[clang] [libcxx] [llvm] [mlir] [libc++] implement `std::flat_multimap` (PR #113835)

Sat Nov 23 11:48:37 PST 2024

https://github.com/huixie90 updated https://github.com/llvm/llvm-project/pull/113835

>From 9dc6f16d829c64d2f08817ed3c07700e29bb252d Mon Sep 17 00:00:00 2001
From: Hui Xie <huixie at Mac.broadband>
Date: Sat, 23 Nov 2024 18:54:14 +0000
Subject: [PATCH 01/12] [libc++] Move  out (for reusing it in flat_multimap)

---
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__flat_map/flat_map.h          | 154 ++--------------
 .../include/__flat_map/key_value_iterator.h   | 170 ++++++++++++++++++
 libcxx/include/flat_map                       |   1 +
 libcxx/include/module.modulemap               |   1 +
 .../iter_iter_stability.pass.cpp              |   1 +
 .../reverse_iterator.pass.cpp                 |   2 +-
 .../flat.map/incomplete_type.pass.cpp         |   1 +
 8 files changed, 193 insertions(+), 138 deletions(-)
 create mode 100644 libcxx/include/__flat_map/key_value_iterator.h

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 0ae031e5365aef..b37b00165fd6ac 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -358,6 +358,7 @@ set(files
   __filesystem/space_info.h
   __filesystem/u8path.h
   __flat_map/flat_map.h
+  __flat_map/key_value_iterator.h
   __flat_map/sorted_unique.h
   __format/buffer.h
   __format/concepts.h
diff --git a/libcxx/include/__flat_map/flat_map.h b/libcxx/include/__flat_map/flat_map.h
index 58b362ad7a706f..13877749d2c5f4 100644
--- a/libcxx/include/__flat_map/flat_map.h
+++ b/libcxx/include/__flat_map/flat_map.h
@@ -23,11 +23,11 @@
 #include <__algorithm/remove_if.h>
 #include <__assert>
 #include <__compare/synth_three_way.h>
-#include <__concepts/convertible_to.h>
 #include <__concepts/swappable.h>
 #include <__config>
 #include <__cstddef/byte.h>
 #include <__cstddef/ptrdiff_t.h>
+#include <__flat_map/key_value_iterator.h>
 #include <__flat_map/sorted_unique.h>
 #include <__functional/invoke.h>
 #include <__functional/is_transparent.h>
@@ -38,7 +38,6 @@
 #include <__iterator/next.h>
 #include <__iterator/ranges_iterator_traits.h>
 #include <__iterator/reverse_iterator.h>
-#include <__memory/addressof.h>
 #include <__memory/allocator_traits.h>
 #include <__memory/uses_allocator.h>
 #include <__memory/uses_allocator_construction.h>
@@ -57,8 +56,8 @@
 #include <__type_traits/is_allocator.h>
 #include <__type_traits/is_nothrow_constructible.h>
 #include <__type_traits/is_same.h>
-#include <__type_traits/maybe_const.h>
 #include <__utility/exception_guard.h>
+#include <__utility/move.h>
 #include <__utility/pair.h>
 #include <__utility/scope_guard.h>
 #include <__vector/vector.h>
@@ -82,9 +81,6 @@ template <class _Key,
           class _KeyContainer    = vector<_Key>,
           class _MappedContainer = vector<_Tp>>
 class flat_map {
-  template <bool _Const>
-  struct __iterator;
-
   template <class, class, class, class, class>
   friend class flat_map;
 
@@ -93,6 +89,9 @@ class flat_map {
   static_assert(!is_same_v<_KeyContainer, std::vector<bool>>, "vector<bool> is not a sequence container");
   static_assert(!is_same_v<_MappedContainer, std::vector<bool>>, "vector<bool> is not a sequence container");
 
+  template <bool _Const>
+  using __iterator = __key_value_iterator<flat_map, _KeyContainer, _MappedContainer, _Const>;
+
 public:
   // types
   using key_type               = _Key;
@@ -134,123 +133,6 @@ class flat_map {
 
   _LIBCPP_HIDE_FROM_ABI static constexpr bool __is_compare_transparent = __is_transparent_v<_Compare, _Compare>;
 
-  template <bool _Const>
-  struct __iterator {
-  private:
-    using __key_iterator    = ranges::iterator_t<const key_container_type>;
-    using __mapped_iterator = ranges::iterator_t<__maybe_const<_Const, mapped_container_type>>;
-    using __reference       = pair<iter_reference_t<__key_iterator>, iter_reference_t<__mapped_iterator>>;
-
-    struct __arrow_proxy {
-      __reference __ref_;
-      _LIBCPP_HIDE_FROM_ABI __reference* operator->() { return std::addressof(__ref_); }
-    };
-
-    __key_iterator __key_iter_;
-    __mapped_iterator __mapped_iter_;
-
-    friend flat_map;
-
-  public:
-    using iterator_concept = random_access_iterator_tag;
-    // `flat_map::iterator` only satisfy "Cpp17InputIterator" named requirements, because
-    // its `reference` is not a reference type.
-    // However, to avoid surprising runtime behaviour when it is used with the
-    // Cpp17 algorithms or operations, iterator_category is set to random_access_iterator_tag.
-    using iterator_category = random_access_iterator_tag;
-    using value_type        = flat_map::value_type;
-    using difference_type   = flat_map::difference_type;
-
-    _LIBCPP_HIDE_FROM_ABI __iterator() = default;
-
-    _LIBCPP_HIDE_FROM_ABI __iterator(__iterator<!_Const> __i)
-      requires _Const && convertible_to<ranges::iterator_t<key_container_type>, __key_iterator> &&
-                   convertible_to<ranges::iterator_t<mapped_container_type>, __mapped_iterator>
-        : __key_iter_(std::move(__i.__key_iter_)), __mapped_iter_(std::move(__i.__mapped_iter_)) {}
-
-    _LIBCPP_HIDE_FROM_ABI __iterator(__key_iterator __key_iter, __mapped_iterator __mapped_iter)
-        : __key_iter_(std::move(__key_iter)), __mapped_iter_(std::move(__mapped_iter)) {}
-
-    _LIBCPP_HIDE_FROM_ABI __reference operator*() const { return __reference(*__key_iter_, *__mapped_iter_); }
-    _LIBCPP_HIDE_FROM_ABI __arrow_proxy operator->() const { return __arrow_proxy{**this}; }
-
-    _LIBCPP_HIDE_FROM_ABI __iterator& operator++() {
-      ++__key_iter_;
-      ++__mapped_iter_;
-      return *this;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI __iterator operator++(int) {
-      __iterator __tmp(*this);
-      ++*this;
-      return __tmp;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI __iterator& operator--() {
-      --__key_iter_;
-      --__mapped_iter_;
-      return *this;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI __iterator operator--(int) {
-      __iterator __tmp(*this);
-      --*this;
-      return __tmp;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI __iterator& operator+=(difference_type __x) {
-      __key_iter_ += __x;
-      __mapped_iter_ += __x;
-      return *this;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI __iterator& operator-=(difference_type __x) {
-      __key_iter_ -= __x;
-      __mapped_iter_ -= __x;
-      return *this;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI __reference operator[](difference_type __n) const { return *(*this + __n); }
-
-    _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) {
-      return __x.__key_iter_ == __y.__key_iter_;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI friend bool operator<(const __iterator& __x, const __iterator& __y) {
-      return __x.__key_iter_ < __y.__key_iter_;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI friend bool operator>(const __iterator& __x, const __iterator& __y) { return __y < __x; }
-
-    _LIBCPP_HIDE_FROM_ABI friend bool operator<=(const __iterator& __x, const __iterator& __y) { return !(__y < __x); }
-
-    _LIBCPP_HIDE_FROM_ABI friend bool operator>=(const __iterator& __x, const __iterator& __y) { return !(__x < __y); }
-
-    _LIBCPP_HIDE_FROM_ABI friend auto operator<=>(const __iterator& __x, const __iterator& __y)
-      requires three_way_comparable<__key_iterator>
-    {
-      return __x.__key_iter_ <=> __y.__key_iter_;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI friend __iterator operator+(const __iterator& __i, difference_type __n) {
-      auto __tmp = __i;
-      __tmp += __n;
-      return __tmp;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI friend __iterator operator+(difference_type __n, const __iterator& __i) { return __i + __n; }
-
-    _LIBCPP_HIDE_FROM_ABI friend __iterator operator-(const __iterator& __i, difference_type __n) {
-      auto __tmp = __i;
-      __tmp -= __n;
-      return __tmp;
-    }
-
-    _LIBCPP_HIDE_FROM_ABI friend difference_type operator-(const __iterator& __x, const __iterator& __y) {
-      return difference_type(__x.__key_iter_ - __y.__key_iter_);
-    }
-  };
-
 public:
   // [flat.map.cons], construct/copy/destroy
   _LIBCPP_HIDE_FROM_ABI flat_map() noexcept(
@@ -1307,22 +1189,20 @@ template <ranges::input_range _Range,
           class _Compare   = less<__range_key_type<_Range>>,
           class _Allocator = allocator<byte>,
           class            = __enable_if_t<!__is_allocator<_Compare>::value && __is_allocator<_Allocator>::value>>
-flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator())
-    -> flat_map<
-        __range_key_type<_Range>,
-        __range_mapped_type<_Range>,
-        _Compare,
-        vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
-        vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
+flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_map<
+    __range_key_type<_Range>,
+    __range_mapped_type<_Range>,
+    _Compare,
+    vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
+    vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
 
 template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator<_Allocator>::value>>
-flat_map(from_range_t, _Range&&, _Allocator)
-    -> flat_map<
-        __range_key_type<_Range>,
-        __range_mapped_type<_Range>,
-        less<__range_key_type<_Range>>,
-        vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
-        vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
+flat_map(from_range_t, _Range&&, _Allocator) -> flat_map<
+    __range_key_type<_Range>,
+    __range_mapped_type<_Range>,
+    less<__range_key_type<_Range>>,
+    vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
+    vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
 
 template <class _Key, class _Tp, class _Compare = less<_Key>>
   requires(!__is_allocator<_Compare>::value)
diff --git a/libcxx/include/__flat_map/key_value_iterator.h b/libcxx/include/__flat_map/key_value_iterator.h
new file mode 100644
index 00000000000000..a88ab1bbce32af
--- /dev/null
+++ b/libcxx/include/__flat_map/key_value_iterator.h
@@ -0,0 +1,170 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FLAT_MAP_KEY_VALUE_ITERATOR_H
+#define _LIBCPP___FLAT_MAP_KEY_VALUE_ITERATOR_H
+
+#include <__compare/three_way_comparable.h>
+#include <__concepts/convertible_to.h>
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__memory/addressof.h>
+#include <__ranges/access.h>
+#include <__type_traits/maybe_const.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+#if _LIBCPP_STD_VER >= 23
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Owner, class _KeyContainer, class _MappedContainer, bool _Const>
+struct __key_value_iterator {
+private:
+  using __key_iterator    = ranges::iterator_t<const _KeyContainer>;
+  using __mapped_iterator = ranges::iterator_t<__maybe_const<_Const, _MappedContainer>>;
+  using __reference       = pair<iter_reference_t<__key_iterator>, iter_reference_t<__mapped_iterator>>;
+
+  struct __arrow_proxy {
+    __reference __ref_;
+    _LIBCPP_HIDE_FROM_ABI __reference* operator->() { return std::addressof(__ref_); }
+  };
+
+  __key_iterator __key_iter_;
+  __mapped_iterator __mapped_iter_;
+
+  friend _Owner;
+
+  template <class, class, class, bool>
+  friend struct __key_value_iterator;
+
+public:
+  using iterator_concept = random_access_iterator_tag;
+  // `flat_map::iterator` only satisfy "Cpp17InputIterator" named requirements, because
+  // its `reference` is not a reference type.
+  // However, to avoid surprising runtime behaviour when it is used with the
+  // Cpp17 algorithms or operations, iterator_category is set to random_access_iterator_tag.
+  using iterator_category = random_access_iterator_tag;
+  using value_type        = typename _Owner::value_type;
+  using difference_type   = typename _Owner::difference_type;
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator() = default;
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator(__key_value_iterator<_Owner, _KeyContainer, _MappedContainer, !_Const> __i)
+    requires _Const && convertible_to<ranges::iterator_t<_KeyContainer>, __key_iterator> &&
+                 convertible_to<ranges::iterator_t<_MappedContainer>, __mapped_iterator>
+      : __key_iter_(std::move(__i.__key_iter_)), __mapped_iter_(std::move(__i.__mapped_iter_)) {}
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator(__key_iterator __key_iter, __mapped_iterator __mapped_iter)
+      : __key_iter_(std::move(__key_iter)), __mapped_iter_(std::move(__mapped_iter)) {}
+
+  _LIBCPP_HIDE_FROM_ABI __reference operator*() const { return __reference(*__key_iter_, *__mapped_iter_); }
+  _LIBCPP_HIDE_FROM_ABI __arrow_proxy operator->() const { return __arrow_proxy{**this}; }
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator++() {
+    ++__key_iter_;
+    ++__mapped_iter_;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator operator++(int) {
+    __key_value_iterator __tmp(*this);
+    ++*this;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator--() {
+    --__key_iter_;
+    --__mapped_iter_;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator operator--(int) {
+    __key_value_iterator __tmp(*this);
+    --*this;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator+=(difference_type __x) {
+    __key_iter_ += __x;
+    __mapped_iter_ += __x;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator-=(difference_type __x) {
+    __key_iter_ -= __x;
+    __mapped_iter_ -= __x;
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI __reference operator[](difference_type __n) const { return *(*this + __n); }
+
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool
+  operator==(const __key_value_iterator& __x, const __key_value_iterator& __y) {
+    return __x.__key_iter_ == __y.__key_iter_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend bool operator<(const __key_value_iterator& __x, const __key_value_iterator& __y) {
+    return __x.__key_iter_ < __y.__key_iter_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend bool operator>(const __key_value_iterator& __x, const __key_value_iterator& __y) {
+    return __y < __x;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend bool operator<=(const __key_value_iterator& __x, const __key_value_iterator& __y) {
+    return !(__y < __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend bool operator>=(const __key_value_iterator& __x, const __key_value_iterator& __y) {
+    return !(__x < __y);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend auto operator<=>(const __key_value_iterator& __x, const __key_value_iterator& __y)
+    requires three_way_comparable<__key_iterator>
+  {
+    return __x.__key_iter_ <=> __y.__key_iter_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator+(const __key_value_iterator& __i, difference_type __n) {
+    auto __tmp = __i;
+    __tmp += __n;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator+(difference_type __n, const __key_value_iterator& __i) {
+    return __i + __n;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator-(const __key_value_iterator& __i, difference_type __n) {
+    auto __tmp = __i;
+    __tmp -= __n;
+    return __tmp;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI friend difference_type
+  operator-(const __key_value_iterator& __x, const __key_value_iterator& __y) {
+    return difference_type(__x.__key_iter_ - __y.__key_iter_);
+  }
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FLAT_MAP_KEY_VALUE_ITERATOR_H
diff --git a/libcxx/include/flat_map b/libcxx/include/flat_map
index 15d79dd1ddca34..e96af677a7eed9 100644
--- a/libcxx/include/flat_map
+++ b/libcxx/include/flat_map
@@ -40,6 +40,7 @@ namespace std {
 #include <__assert> // all public C++ headers provide the assertion handler
 #include <__config>
 #include <__flat_map/flat_map.h>
+#include <__flat_map/key_value_iterator.h>
 #include <__flat_map/sorted_unique.h>
 #include <version>
 
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 4e06a68c6a6b61..52e13aebc2187c 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -1229,6 +1229,7 @@ module std [system] {
 
   module flat_map {
     module flat_map                       { header "__flat_map/flat_map.h" }
+    module key_value_iterator             { header "__flat_map/key_value_iterator.h" }
     module sorted_unique                  { header "__flat_map/sorted_unique.h" }
 
     header "flat_map"
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp
index 1ce859f6c737ea..14189840ce6605 100644
--- a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp
@@ -23,6 +23,7 @@
 #include <flat_map>
 #include <random>
 #include <map>
+#include <vector>
 
 #include "test_macros.h"
 
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp
index 09e18986a7e813..fc3949d70745fc 100644
--- a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp
@@ -23,7 +23,7 @@
 #include <deque>
 #include <flat_map>
 #include <functional>
-#include <string>
+#include <vector>
 
 #include <iterator>
 
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp
index 81c590ba73a157..76461951f0d3d8 100644
--- a/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp
@@ -14,6 +14,7 @@
 // type.
 
 #include <flat_map>
+#include <vector>
 
 struct A {
   using Map = std::flat_map<A, A>;

>From 99baa5ca6172a1852cdb804104a986f3b378c7c5 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sat, 23 Nov 2024 17:00:51 +0000
Subject: [PATCH 02/12] [AArch64][GlobalISel] Legalize ptr shuffle vector to
 s64 (#116013)

This converts all ptr element shuffle vectors to s64, so that the
existing vector legalization handling can lower them as needed. This
prevents a lot of fallbacks that currently try to generate things like
`<2 x ptr> G_EXT`.

I'm not sure if bitcast/inttoptr/ptrtoint is intended to be necessary
for vectors of pointers, but it uses buildCast for the casts, which now
generates a ptrtoint/inttoptr.
---
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |  2 +
 .../llvm/CodeGen/GlobalISel/LegalizerInfo.h   |  3 +
 .../CodeGen/GlobalISel/LegalityPredicates.cpp |  6 ++
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 37 +++++++++
 .../CodeGen/GlobalISel/MachineIRBuilder.cpp   |  7 +-
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    | 15 +++-
 .../GlobalISel/legalize-shuffle-vector.mir    |  7 +-
 llvm/test/CodeGen/AArch64/arm64-ext.ll        |  4 +-
 llvm/test/CodeGen/AArch64/neon-perm.ll        |  9 +-
 .../test/CodeGen/AArch64/neon-vector-splat.ll | 18 ++--
 llvm/test/CodeGen/AArch64/shufflevector.ll    | 83 +++++++++++++------
 11 files changed, 138 insertions(+), 53 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index f682b20816d57f..2384b22c052662 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -378,6 +378,8 @@ class LegalizerHelper {
                                         LLT CastTy);
   LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
                                      LLT CastTy);
+  LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
+                                      LLT CastTy);
   LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
                                          LLT CastTy);
   LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 8b1c11a6f41301..b681a0708db4b9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -292,6 +292,9 @@ LegalityPredicate isPointer(unsigned TypeIdx);
 /// True iff the specified type index is a pointer with the specified address
 /// space.
 LegalityPredicate isPointer(unsigned TypeIdx, unsigned AddrSpace);
+/// True iff the specified type index is a vector of pointers (with any address
+/// space).
+LegalityPredicate isPointerVector(unsigned TypeIdx);
 
 /// True if the type index is a vector with element type \p EltTy
 LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT EltTy);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index b7541effafe5ce..30c2d089c31214 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -101,6 +101,12 @@ LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
   };
 }
 
+LegalityPredicate LegalityPredicates::isPointerVector(unsigned TypeIdx) {
+  return [=](const LegalityQuery &Query) {
+    return Query.Types[TypeIdx].isPointerVector();
+  };
+}
+
 LegalityPredicate LegalityPredicates::elementTypeIs(unsigned TypeIdx,
                                                     LLT EltTy) {
   return [=](const LegalityQuery &Query) {
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 062dbbe904de33..321760ef822bc2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3697,6 +3697,41 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
   return Legalized;
 }
 
+// This bitcasts a shuffle vector to a different type currently of the same
+// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
+// will be used instead.
+//
+// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
+// ===>
+// <4 x s64> = G_PTRTOINT <4 x p0>
+// <4 x s64> = G_PTRTOINT <4 x p0>
+// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
+// <16 x p0> = G_INTTOPTR <16 x s64>
+LegalizerHelper::LegalizeResult
+LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
+                                      LLT CastTy) {
+  auto ShuffleMI = cast<GShuffleVector>(&MI);
+  LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
+  LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
+
+  // We currently only handle vectors of the same size.
+  if (TypeIdx != 0 ||
+      CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
+      CastTy.getElementCount() != DstTy.getElementCount())
+    return UnableToLegalize;
+
+  LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
+
+  auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
+  auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
+  auto Shuf =
+      MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
+  MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 /// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
 ///
 ///  <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
@@ -4133,6 +4168,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
   case TargetOpcode::G_CONCAT_VECTORS:
     return bitcastConcatVector(MI, TypeIdx, CastTy);
+  case TargetOpcode::G_SHUFFLE_VECTOR:
+    return bitcastShuffleVector(MI, TypeIdx, CastTy);
   case TargetOpcode::G_EXTRACT_SUBVECTOR:
     return bitcastExtractSubvector(MI, TypeIdx, CastTy);
   case TargetOpcode::G_INSERT_SUBVECTOR:
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index d910e33ac40f65..be347006a81f92 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -600,12 +600,13 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
     return buildCopy(Dst, Src);
 
   unsigned Opcode;
-  if (SrcTy.isPointer() && DstTy.isScalar())
+  if (SrcTy.isPointerOrPointerVector())
     Opcode = TargetOpcode::G_PTRTOINT;
-  else if (DstTy.isPointer() && SrcTy.isScalar())
+  else if (DstTy.isPointerOrPointerVector())
     Opcode = TargetOpcode::G_INTTOPTR;
   else {
-    assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
+    assert(!SrcTy.isPointerOrPointerVector() &&
+           !DstTy.isPointerOrPointerVector() && "no G_ADDRCAST yet");
     Opcode = TargetOpcode::G_BITCAST;
   }
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index c8f01068f72189..9c1bbafd337e93 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -840,13 +840,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   getActionDefinitionsBuilder(G_PTRTOINT)
       .legalFor({{s64, p0}, {v2s64, v2p0}})
       .widenScalarToNextPow2(0, 64)
-      .clampScalar(0, s64, s64);
+      .clampScalar(0, s64, s64)
+      .clampMaxNumElements(0, s64, 2);
 
   getActionDefinitionsBuilder(G_INTTOPTR)
       .unsupportedIf([&](const LegalityQuery &Query) {
         return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
       })
-      .legalFor({{p0, s64}, {v2p0, v2s64}});
+      .legalFor({{p0, s64}, {v2p0, v2s64}})
+      .clampMaxNumElements(1, s64, 2);
 
   // Casts for 32 and 64-bit width type are just copies.
   // Same for 128-bit width type, except they are on the FPR bank.
@@ -1053,7 +1055,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
         if (DstTy != SrcTy)
           return false;
         return llvm::is_contained(
-            {v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
+            {v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
       })
       // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
       // just want those lowered into G_BUILD_VECTOR
@@ -1079,7 +1081,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampNumElements(0, v8s8, v16s8)
       .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v4s32, v4s32)
-      .clampNumElements(0, v2s64, v2s64);
+      .clampNumElements(0, v2s64, v2s64)
+      .bitcastIf(isPointerVector(0), [=](const LegalityQuery &Query) {
+        // Bitcast pointers vector to i64.
+        const LLT DstTy = Query.Types[0];
+        return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
+      });
 
   getActionDefinitionsBuilder(G_CONCAT_VECTORS)
       .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index c92718f9e9b3c7..2464026aa125b5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -59,8 +59,11 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1
-    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0)
-    ; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>)
+    ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY]](<2 x p0>)
+    ; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>)
+    ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[PTRTOINT]](<2 x s64>), [[PTRTOINT1]], shufflemask(0, 0)
+    ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[SHUF]](<2 x s64>)
+    ; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(<2 x p0>) = COPY $q0
     %1:_(<2 x p0>) = COPY $q1
diff --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll
index e32d83327fe424..50df6a0388587b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for test_v2p0
+; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <8 x i8> @test_vextd(<8 x i8> %tmp1, <8 x i8> %tmp2) {
 ; CHECK-LABEL: test_vextd:
diff --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll
index def0f15790a9ba..7218204ba844ca 100644
--- a/llvm/test/CodeGen/AArch64/neon-perm.ll
+++ b/llvm/test/CodeGen/AArch64/neon-perm.ll
@@ -1,13 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for test_vuzp1q_p0
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_vuzp2q_p0
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_vzip1q_p0
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_vzip2q_p0
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_vtrn1q_p0
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_vtrn2q_p0
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
diff --git a/llvm/test/CodeGen/AArch64/neon-vector-splat.ll b/llvm/test/CodeGen/AArch64/neon-vector-splat.ll
index 489eaf179a1bd2..d3846cab46f55d 100644
--- a/llvm/test/CodeGen/AArch64/neon-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/neon-vector-splat.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for shuffle8
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <2 x i32> @shuffle(ptr %P) {
 ; CHECK-SD-LABEL: shuffle:
@@ -116,10 +114,16 @@ define <2 x i64> @shuffle7(ptr %P) {
 }
 
 define <2 x ptr> @shuffle8(ptr %P) {
-; CHECK-LABEL: shuffle8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld1r { v0.2d }, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: shuffle8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ld1r { v0.2d }, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: shuffle8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldr q0, [x0]
+; CHECK-GI-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-GI-NEXT:    ret
   %lv2ptr = load <2 x ptr>, ptr %P
   %sv2ptr = shufflevector <2 x ptr> %lv2ptr, <2 x ptr> undef, <2 x i32> zeroinitializer
   ret <2 x ptr> %sv2ptr
diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll
index 69d3174581e3ef..0f5b240e387ed0 100644
--- a/llvm/test/CodeGen/AArch64/shufflevector.ll
+++ b/llvm/test/CodeGen/AArch64/shufflevector.ll
@@ -1,11 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI:       warning: Instruction selection used fallback path for shufflevector_v2p0
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for shufflevector_v2p0_zeroes
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for shufflevector_v4p0
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for shufflevector_v4p0_zeroes
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; ===== Legal Vector Types =====
 
@@ -392,13 +387,49 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
     ret <4 x i64> %c
 }
 
+define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) {
+; CHECK-SD-LABEL: shufflevector_v3p0:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fmov d2, d5
+; CHECK-SD-NEXT:    fmov d0, d1
+; CHECK-SD-NEXT:    fmov d1, d3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: shufflevector_v3p0:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov x9, d3
+; CHECK-GI-NEXT:    mov v0.d[0], x8
+; CHECK-GI-NEXT:    mov v2.d[0], x9
+; CHECK-GI-NEXT:    fmov x8, d1
+; CHECK-GI-NEXT:    fmov x9, d4
+; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    mov v2.d[1], x9
+; CHECK-GI-NEXT:    fmov x8, d5
+; CHECK-GI-NEXT:    mov v1.d[0], x8
+; CHECK-GI-NEXT:    ext v0.16b, v0.16b, v2.16b, #8
+; CHECK-GI-NEXT:    fmov x10, d1
+; CHECK-GI-NEXT:    mov d2, v0.d[1]
+; CHECK-GI-NEXT:    fmov d1, d2
+; CHECK-GI-NEXT:    fmov d2, x10
+; CHECK-GI-NEXT:    ret
+    %c = shufflevector <3 x ptr> %a, <3 x ptr> %b, <3 x i32> <i32 1, i32 3, i32 5>
+    ret <3 x ptr> %c
+}
+
 define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) {
-; CHECK-LABEL: shufflevector_v4p0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    zip2 v2.2d, v2.2d, v3.2d
-; CHECK-NEXT:    zip2 v0.2d, v0.2d, v1.2d
-; CHECK-NEXT:    mov v1.16b, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: shufflevector_v4p0:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    zip2 v2.2d, v2.2d, v3.2d
+; CHECK-SD-NEXT:    zip2 v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT:    mov v1.16b, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: shufflevector_v4p0:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    zip2 v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    zip2 v1.2d, v2.2d, v3.2d
+; CHECK-GI-NEXT:    ret
     %c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
     ret <4 x ptr> %c
 }
@@ -549,13 +580,13 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fmov s0, w0
 ; CHECK-GI-NEXT:    fmov s1, w3
-; CHECK-GI-NEXT:    adrp x8, .LCPI34_0
+; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
 ; CHECK-GI-NEXT:    mov v0.b[1], w1
 ; CHECK-GI-NEXT:    mov v1.b[1], w4
 ; CHECK-GI-NEXT:    mov v0.b[2], w2
 ; CHECK-GI-NEXT:    mov v1.b[2], w5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI35_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
 ; CHECK-GI-NEXT:    umov w0, v0.b[0]
 ; CHECK-GI-NEXT:    umov w1, v0.b[1]
@@ -570,9 +601,9 @@ define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-SD-NEXT:    adrp x8, .LCPI35_0
+; CHECK-SD-NEXT:    adrp x8, .LCPI36_0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI35_0]
+; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
 ;
@@ -580,9 +611,9 @@ define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
+; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
@@ -601,9 +632,9 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
+; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI37_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
@@ -614,18 +645,18 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
 define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) {
 ; CHECK-SD-LABEL: shufflevector_v7i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    adrp x8, .LCPI37_0
+; CHECK-SD-NEXT:    adrp x8, .LCPI38_0
 ; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI37_0]
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
 ; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v7i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
+; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
 ; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI37_0]
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
 ; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
@@ -642,9 +673,9 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) {
 ;
 ; CHECK-GI-LABEL: shufflevector_v3i32:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
+; CHECK-GI-NEXT:    adrp x8, .LCPI39_0
 ; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI39_0]
 ; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret

>From 682421683943909702ebe820bcf3bac929bbdbe4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 23 Nov 2024 09:21:51 -0800
Subject: [PATCH 03/12] AMDGPU: Stop running assembler tests with default cpu
 (#117421)

It does not make sense to assemble for the default target.
Add one that shows the behavior. It is treated as a tahiti
alias without instructions which were later removed, and needs
to be treated as wave64. We should probably turn this into a hard
error though.
---
 llvm/test/MC/AMDGPU/ds.s                           |  2 --
 .../MC/AMDGPU/invalid-instructions-spellcheck.s    |  2 +-
 llvm/test/MC/AMDGPU/literals.s                     |  2 --
 llvm/test/MC/AMDGPU/mimg-err.s                     |  1 -
 llvm/test/MC/AMDGPU/mimg.s                         |  2 --
 llvm/test/MC/AMDGPU/regression/bug28165.s          |  1 -
 llvm/test/MC/AMDGPU/regression/bug28413.s          |  1 -
 llvm/test/MC/AMDGPU/smrd.s                         |  2 --
 llvm/test/MC/AMDGPU/sopk.s                         |  2 --
 llvm/test/MC/AMDGPU/unknown-target-cpu.s           | 14 ++++++++++++++
 llvm/test/MC/AMDGPU/vintrp.s                       |  2 +-
 llvm/test/MC/AMDGPU/vop1.s                         |  2 --
 llvm/test/MC/AMDGPU/vop2.s                         |  2 --
 llvm/test/MC/AMDGPU/vop3-convert.s                 |  2 --
 llvm/test/MC/AMDGPU/vop3-errs.s                    |  1 -
 llvm/test/MC/AMDGPU/vop3.s                         |  4 ++--
 llvm/test/MC/AMDGPU/vop_dpp.s                      |  1 -
 llvm/test/MC/AMDGPU/vop_sdwa.s                     |  1 -
 llvm/test/MC/AMDGPU/vopc.s                         |  1 -
 19 files changed, 18 insertions(+), 27 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/unknown-target-cpu.s

diff --git a/llvm/test/MC/AMDGPU/ds.s b/llvm/test/MC/AMDGPU/ds.s
index fd436fe9fe0ddb..bb1840eb849dfe 100644
--- a/llvm/test/MC/AMDGPU/ds.s
+++ b/llvm/test/MC/AMDGPU/ds.s
@@ -1,9 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti  -show-encoding %s | FileCheck %s --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire  -show-encoding %s | FileCheck %s --check-prefixes=CI,SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 
diff --git a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s
index a5cca6ba5bd936..79ab8666234a2f 100644
--- a/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s
+++ b/llvm/test/MC/AMDGPU/invalid-instructions-spellcheck.s
@@ -1,4 +1,4 @@
-# RUN: not llvm-mc -triple amdgcn < %s 2>&1 | FileCheck --strict-whitespace %s
+# RUN: not llvm-mc -triple amdgcn -mcpu=tahiti < %s 2>&1 | FileCheck --strict-whitespace %s
 
 # This tests the mnemonic spell checker.
 
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 7b3bd5ece09884..783947544d2212 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -1,10 +1,8 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=SICI,CI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GFX89
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX89,GFX9
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCIVI,NOSICIVI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICIVI,NOVI,NOGFX89 --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/mimg-err.s b/llvm/test/MC/AMDGPU/mimg-err.s
index 6cf92f29c27b78..bec33bab984ab3 100644
--- a/llvm/test/MC/AMDGPU/mimg-err.s
+++ b/llvm/test/MC/AMDGPU/mimg-err.s
@@ -1,4 +1,3 @@
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOGCN --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=NOGFX9 --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/mimg.s b/llvm/test/MC/AMDGPU/mimg.s
index 29e402d9496f16..54bb2b19b2e844 100644
--- a/llvm/test/MC/AMDGPU/mimg.s
+++ b/llvm/test/MC/AMDGPU/mimg.s
@@ -1,11 +1,9 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI --check-prefix=SICIVI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck %s --check-prefix=GCN  --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_0
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=GCN  --check-prefix=SICIVI --check-prefix=VI --check-prefix=GFX89 --check-prefix=GFX8_1
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX9 --check-prefix=GFX89
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=NOVI --check-prefix=NOGFX8_0 --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/regression/bug28165.s b/llvm/test/MC/AMDGPU/regression/bug28165.s
index 1e31f204e8995f..6d04e13316b610 100644
--- a/llvm/test/MC/AMDGPU/regression/bug28165.s
+++ b/llvm/test/MC/AMDGPU/regression/bug28165.s
@@ -1,4 +1,3 @@
-// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI
diff --git a/llvm/test/MC/AMDGPU/regression/bug28413.s b/llvm/test/MC/AMDGPU/regression/bug28413.s
index 5fbf9f37d4a8de..7cf413d2d0a17c 100644
--- a/llvm/test/MC/AMDGPU/regression/bug28413.s
+++ b/llvm/test/MC/AMDGPU/regression/bug28413.s
@@ -1,4 +1,3 @@
-// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI
diff --git a/llvm/test/MC/AMDGPU/smrd.s b/llvm/test/MC/AMDGPU/smrd.s
index b877bce22af56d..12e01321b967a4 100644
--- a/llvm/test/MC/AMDGPU/smrd.s
+++ b/llvm/test/MC/AMDGPU/smrd.s
@@ -1,9 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN  %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefix=GCN %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck --check-prefixes=GCN,CI %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefix=VI %s
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti  %s 2>&1 | FileCheck %s --check-prefix=NOSI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji  %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error:
 
diff --git a/llvm/test/MC/AMDGPU/sopk.s b/llvm/test/MC/AMDGPU/sopk.s
index c912b83ca61c27..59c93fefcfaa23 100644
--- a/llvm/test/MC/AMDGPU/sopk.s
+++ b/llvm/test/MC/AMDGPU/sopk.s
@@ -1,11 +1,9 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck --check-prefixes=GCN,SICI %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,VI %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefixes=GCN,VI9,GFX9 %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GCN,GFX10 %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX11 %s
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=NOSICIVI --implicit-check-not=error: %s
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefix=NOGFX9 --implicit-check-not=error: %s
diff --git a/llvm/test/MC/AMDGPU/unknown-target-cpu.s b/llvm/test/MC/AMDGPU/unknown-target-cpu.s
new file mode 100644
index 00000000000000..b671770084a1fc
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/unknown-target-cpu.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple=amdgcn -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding < %s | FileCheck %s
+
+// CHECK: v_cmp_lt_f32_e32 vcc, s2, v4            ; encoding: [0x02,0x08,0x02,0x7c]
+v_cmp_lt_f32 vcc, s2, v4
+
+// CHECK: v_cndmask_b32_e32 v1, v2, v3, vcc       ; encoding: [0x02,0x07,0x02,0x00]
+v_cndmask_b32 v1, v2, v3, vcc
+
+// CHECK: v_mac_legacy_f32_e64 v1, v3, s5         ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
+v_mac_legacy_f32 v1, v3, s5
+
+// CHECK: v_lshr_b32_e32 v0, v1, v2               ; encoding: [0x01,0x05,0x00,0x2a]
+v_lshr_b32 v0, v1, v2
diff --git a/llvm/test/MC/AMDGPU/vintrp.s b/llvm/test/MC/AMDGPU/vintrp.s
index db15f8eb4499dd..35720c95cf31e7 100644
--- a/llvm/test/MC/AMDGPU/vintrp.s
+++ b/llvm/test/MC/AMDGPU/vintrp.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck -check-prefix=SI %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=SI %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s
 
 v_interp_p1_f32 v1, v0, attr0.x
diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s
index f7e5db7fa3d39f..af0d289e827eed 100644
--- a/llvm/test/MC/AMDGPU/vop1.s
+++ b/llvm/test/MC/AMDGPU/vop1.s
@@ -1,9 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,CI,SICI,CIVI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,CIVI,VI
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s
index ade7ce95f17584..7317ab00ad7820 100644
--- a/llvm/test/MC/AMDGPU/vop2.s
+++ b/llvm/test/MC/AMDGPU/vop2.s
@@ -1,9 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=VI
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/vop3-convert.s b/llvm/test/MC/AMDGPU/vop3-convert.s
index 0f33a81c6ea0fe..02d576fdcd845b 100644
--- a/llvm/test/MC/AMDGPU/vop3-convert.s
+++ b/llvm/test/MC/AMDGPU/vop3-convert.s
@@ -1,9 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=GCN,SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GCN,VI
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s -check-prefix=NOVI --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/vop3-errs.s b/llvm/test/MC/AMDGPU/vop3-errs.s
index e600151410389d..94fc0ea8b3e9eb 100644
--- a/llvm/test/MC/AMDGPU/vop3-errs.s
+++ b/llvm/test/MC/AMDGPU/vop3-errs.s
@@ -1,4 +1,3 @@
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=GFX67 --check-prefix=GCN --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=fiji %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefix=GFX89 --check-prefix=GCN --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s
index 0d2544002a9f21..ccae2611d4ffdd 100644
--- a/llvm/test/MC/AMDGPU/vop3.s
+++ b/llvm/test/MC/AMDGPU/vop3.s
@@ -1,11 +1,11 @@
-// RUN: not llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI --check-prefix=SICI
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
 
 // Make sure interp instructions disassemble regardless of lds bank count
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=VI
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=hawaii %s 2>&1 | FileCheck %s -check-prefix=NOCI --check-prefix=NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx810 %s 2>&1 | FileCheck -check-prefix=NOVI --implicit-check-not=error: %s
diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s
index a15a48e507a627..c7cfb7ae67a979 100644
--- a/llvm/test/MC/AMDGPU/vop_dpp.s
+++ b/llvm/test/MC/AMDGPU/vop_dpp.s
@@ -1,7 +1,6 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=VI,VI9
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX9,VI9
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOSICI,NOCI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefix=NOVI --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/vop_sdwa.s b/llvm/test/MC/AMDGPU/vop_sdwa.s
index 0c803a9819a831..0e007d5e360a31 100644
--- a/llvm/test/MC/AMDGPU/vop_sdwa.s
+++ b/llvm/test/MC/AMDGPU/vop_sdwa.s
@@ -1,7 +1,6 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=VI,GFX89
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX9,GFX89
 
-// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOSI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOCI,NOSICI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOVI,NOGFX89 --implicit-check-not=error:
diff --git a/llvm/test/MC/AMDGPU/vopc.s b/llvm/test/MC/AMDGPU/vopc.s
index 55289c0a463fa9..9ff4f7eda73a0a 100644
--- a/llvm/test/MC/AMDGPU/vopc.s
+++ b/llvm/test/MC/AMDGPU/vopc.s
@@ -1,4 +1,3 @@
-// RUN: llvm-mc -triple=amdgcn -show-encoding %s | FileCheck %s --check-prefix=SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI
 // RUN: llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
 

>From 0da6677ac3cccddab7a0de20f06680020125ed46 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 23 Nov 2024 09:24:44 -0800
Subject: [PATCH 04/12] AMDGPU: Move default wavesize hack for disassembler
 (#117422)

You cannot adjust the disassembler's subtarget. llvm-mc passes
the originally constructed MCSubtargetInfo around, rather than
querying the pointer in the disassembler instance.
---
 .../Disassembler/AMDGPUDisassembler.cpp       | 20 ++-----------------
 .../MCTargetDesc/AMDGPUMCTargetDesc.cpp       | 17 +++++++++++++++-
 llvm/test/MC/AMDGPU/unknown-target-cpu.s      |  5 +++--
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index f90121a86c846c..7817c5ff5acc0a 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -45,26 +45,10 @@ using namespace llvm;
 
 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
 
-static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
-                                                 MCContext &Ctx) {
-  if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
-      !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
-    MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
-    // If there is no default wave size it must be a generation before gfx10,
-    // these have FeatureWavefrontSize64 in their definition already. For gfx10+
-    // set wave32 as a default.
-    STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
-    return STICopy;
-  }
-
-  return STI;
-}
-
 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
                                        MCContext &Ctx, MCInstrInfo const *MCII)
-    : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
-      MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
-      TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
+    : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
+      MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
       CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
   // ToDo: AMDGPUDisassembler supports only VI ISA.
   if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 29be64625811f7..c692895d84c002 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -77,7 +77,22 @@ static MCSubtargetInfo *
 createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   if (TT.getArch() == Triple::r600)
     return createR600MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
-  return createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+
+  MCSubtargetInfo *STI =
+      createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+
+  // FIXME: We should error for the default target.
+  if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) &&
+      !STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+    // If there is no default wave size it must be a generation before gfx10,
+    // these have FeatureWavefrontSize64 in their definition already. For gfx10+
+    // set wave32 as a default.
+    STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI)
+                           ? AMDGPU::FeatureWavefrontSize32
+                           : AMDGPU::FeatureWavefrontSize64);
+  }
+
+  return STI;
 }
 
 static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
diff --git a/llvm/test/MC/AMDGPU/unknown-target-cpu.s b/llvm/test/MC/AMDGPU/unknown-target-cpu.s
index b671770084a1fc..3d41e8eb5b2c45 100644
--- a/llvm/test/MC/AMDGPU/unknown-target-cpu.s
+++ b/llvm/test/MC/AMDGPU/unknown-target-cpu.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -triple=amdgcn -show-encoding < %s | FileCheck %s
+// RUN: not llvm-mc -triple=amdgcn -show-encoding < %s | FileCheck %s
+// RUN: not llvm-mc -triple=amdgcn -show-encoding -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding < %s | FileCheck %s
 
 // CHECK: v_cmp_lt_f32_e32 vcc, s2, v4            ; encoding: [0x02,0x08,0x02,0x7c]
@@ -7,7 +8,7 @@ v_cmp_lt_f32 vcc, s2, v4
 // CHECK: v_cndmask_b32_e32 v1, v2, v3, vcc       ; encoding: [0x02,0x07,0x02,0x00]
 v_cndmask_b32 v1, v2, v3, vcc
 
-// CHECK: v_mac_legacy_f32_e64 v1, v3, s5         ; encoding: [0x01,0x00,0x0c,0xd2,0x03,0x0b,0x00,0x00]
+// ERR: [[@LINE+1]]:1: error: instruction not supported on this GPU
 v_mac_legacy_f32 v1, v3, s5
 
 // CHECK: v_lshr_b32_e32 v0, v1, v2               ; encoding: [0x01,0x05,0x00,0x2a]

>From 1a224705f2a7fb6978d56a28a83e1fb8be1be0bd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 23 Nov 2024 09:27:47 -0800
Subject: [PATCH 05/12] AMDGPU: Remove wavefrontsize64 feature from dummy
 target (#117410)

This is a refinement for the existing hack. With this,
the default target will have neither wavefrontsize feature
present, unless it was explicitly specified. That is,
getWavefrontSize() == 64 no longer implies +wavefrontsize64.
getWavefrontSize() == 32 does imply +wavefrontsize32.

Continue to assume the value is 64 with no wavesize feature.
This maintains the codegenable property without any code
that directly cares about the wavesize needing to worry about it.

Introduce an isWaveSizeKnown helper to check if we know the
wavesize is accurate based on having one of the features explicitly
set, or a known target-cpu.

I'm not sure what's going on in wave_any.s. It's testing what
happens when both wavesizes are enabled, but this is treated
as an error in codegen. We now treat wave32 as the winning
case, so some cases that were previously printed as vcc are now
vcc_lo.
---
 .../Disassembler/AMDGPUDisassembler.cpp       | 16 +++----
 llvm/lib/Target/AMDGPU/GCNProcessors.td       |  4 +-
 llvm/lib/Target/AMDGPU/GCNSubtarget.cpp       | 16 +++----
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |  8 ++++
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp |  6 +--
 llvm/test/MC/AMDGPU/wave_any.s                | 48 +++++++++----------
 6 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 7817c5ff5acc0a..136fe2e3f90d02 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1826,28 +1826,28 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
           STI.hasFeature(AMDGPU::FeatureGFX10)) &&
          "SDWAVopcDst should be present only on GFX9+");
 
-  bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
+  bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
 
   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
 
     int TTmpIdx = getTTmpIdx(Val);
     if (TTmpIdx >= 0) {
-      auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
+      auto TTmpClsId = getTtmpClassId(IsWave32 ? OPW32 : OPW64);
       return createSRegOperand(TTmpClsId, TTmpIdx);
     }
     if (Val > SGPR_MAX) {
-      return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val);
+      return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
     }
-    return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
+    return createSRegOperand(getSgprClassId(IsWave32 ? OPW32 : OPW64), Val);
   }
-  return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
+  return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
 }
 
 MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
-  return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
-             ? decodeSrcOp(OPW64, Val)
-             : decodeSrcOp(OPW32, Val);
+  return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
+             ? decodeSrcOp(OPW32, Val)
+             : decodeSrcOp(OPW64, Val);
 }
 
 MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 508f2dd83108d9..a86c76bb6075e8 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -9,11 +9,11 @@
 // The code produced for "generic" is only useful for tests and cannot
 // reasonably be expected to execute on any particular target.
 def : ProcessorModel<"generic", NoSchedModel,
-  [FeatureWavefrontSize64, FeatureGDS, FeatureGWS]
+  [FeatureGDS, FeatureGWS]
 >;
 
 def : ProcessorModel<"generic-hsa", NoSchedModel,
-  [FeatureWavefrontSize64, FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
+  [FeatureGDS, FeatureGWS, FeatureFlatAddressSpace]
 >;
 
 //===------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 6233ca2eb4f1dd..51361b75940560 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -100,14 +100,16 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
   if (Gen == AMDGPUSubtarget::INVALID) {
     Gen = TT.getOS() == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS
                                        : AMDGPUSubtarget::SOUTHERN_ISLANDS;
-  }
-
-  if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
-      !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
+    // Assume wave64 for the unknown target, if not explicitly set.
+    if (getWavefrontSizeLog2() == 0)
+      WavefrontSizeLog2 = 6;
+  } else if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
+             !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
     // If there is no default wave size it must be a generation before gfx10,
     // these have FeatureWavefrontSize64 in their definition already. For gfx10+
     // set wave32 as a default.
     ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+    WavefrontSizeLog2 = getGeneration() >= AMDGPUSubtarget::GFX10 ? 5 : 6;
   }
 
   // We don't support FP64 for EG/NI atm.
@@ -147,10 +149,6 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
       !getFeatureBits().test(AMDGPU::FeatureCuMode))
     LocalMemorySize *= 2;
 
-  // Don't crash on invalid devices.
-  if (WavefrontSizeLog2 == 0)
-    WavefrontSizeLog2 = 5;
-
   HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
   HasSMulHi = getGeneration() >= AMDGPUSubtarget::GFX9;
 
@@ -166,7 +164,7 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
 
 void GCNSubtarget::checkSubtargetFeatures(const Function &F) const {
   LLVMContext &Ctx = F.getContext();
-  if (hasFeature(AMDGPU::FeatureWavefrontSize32) ==
+  if (hasFeature(AMDGPU::FeatureWavefrontSize32) &&
       hasFeature(AMDGPU::FeatureWavefrontSize64)) {
     Ctx.diagnose(DiagnosticInfoUnsupported(
         F, "must specify exactly one of wavefrontsize32 and wavefrontsize64"));
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index c7022ce78650ea..18219174b16b1e 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1569,6 +1569,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return getWavefrontSize() == 64;
   }
 
+  /// Returns if the wavesize of this subtarget is known reliable. This is false
+  /// only for the a default target-cpu that does not have an explicit
+  /// +wavefrontsize target feature.
+  bool isWaveSizeKnown() const {
+    return hasFeature(AMDGPU::FeatureWavefrontSize32) ||
+           hasFeature(AMDGPU::FeatureWavefrontSize64);
+  }
+
   const TargetRegisterClass *getBoolRC() const {
     return getRegisterInfo()->getBoolRC();
   }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 344028c4b48689..e21aa70c9859a0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -649,9 +649,9 @@ void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
                                                raw_ostream &O) {
   if (!FirstOperand)
     O << ", ";
-  printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
-                      ? AMDGPU::VCC
-                      : AMDGPU::VCC_LO,
+  printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
+                      ? AMDGPU::VCC_LO
+                      : AMDGPU::VCC,
                   O, MRI);
   if (FirstOperand)
     O << ", ";
diff --git a/llvm/test/MC/AMDGPU/wave_any.s b/llvm/test/MC/AMDGPU/wave_any.s
index 825a0abc172240..27502eff89bfc8 100644
--- a/llvm/test/MC/AMDGPU/wave_any.s
+++ b/llvm/test/MC/AMDGPU/wave_any.s
@@ -1,13 +1,13 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
 
 v_cmp_ge_i32_e32 s0, v0
-// GFX10: v_cmp_ge_i32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
 
 v_cmp_ge_i32_e32 vcc_lo, s0, v1
-// GFX10: v_cmp_ge_i32_e32 vcc, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
 
 v_cmp_ge_i32_e32 vcc, s0, v2
-// GFX10: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
 
 v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
@@ -16,10 +16,10 @@ v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
 
 v_cmp_class_f32_e32 vcc_lo, s0, v0
-// GFX10: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
+// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
 
 v_cmp_class_f32_e32 vcc, s0, v0
-// GFX10: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
+// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
 
 v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX10: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
@@ -34,40 +34,40 @@ v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX10: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
 
 v_cndmask_b32_e32 v1, v2, v3,
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
 
 v_cndmask_b32_e32 v1, v2, v3, vcc_lo
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
 
 v_cndmask_b32_e32 v1, v2, v3, vcc
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
 
 v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
-// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
+// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
 
 v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc
-// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
+// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
 
 v_add_co_ci_u32_e32 v3, v3, v4
-// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
+// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
 
 v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
-// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
+// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
 
 v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc
-// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
+// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
 
 v_sub_co_ci_u32_e32 v3, v3, v4
-// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
+// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
 
 v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
+// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
 
 v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc
-// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
+// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
 
 v_subrev_co_ci_u32_e32 v1, 0, v1
-// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
+// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
 
 v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
@@ -76,7 +76,7 @@ v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD sr
 // GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
+// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
@@ -85,7 +85,7 @@ v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD sr
 // GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
 
 v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
+// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
 
 v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
@@ -94,10 +94,10 @@ v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD
 // GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
 
 v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
+// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
 
 v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
+// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
 
 v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
@@ -106,7 +106,7 @@ v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:U
 // GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
 
 v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
+// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
 
 v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
@@ -189,8 +189,8 @@ v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
 v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
 // GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
-v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
-// GFX10: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
+v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3]
+// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
 // GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]

>From ebb9e4b45f8bd480096400b224e0fad827ab0f7e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 23 Nov 2024 09:30:57 -0800
Subject: [PATCH 06/12] AMDGPU: Use isWave[32|64] instead of comparing size
 value (#117411)

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++--
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp    | 6 +++---
 llvm/lib/Target/AMDGPU/SIInstrInfo.td     | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e520dfff1016b2..73ca59fe320d27 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16677,8 +16677,8 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
   const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
   const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
   if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
-    return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass
-                                               : &AMDGPU::SReg_32RegClass;
+    return Subtarget->isWave64() ? &AMDGPU::SReg_64RegClass
+                                 : &AMDGPU::SReg_32RegClass;
   if (!TRI->isSGPRClass(RC) && !isDivergent)
     return TRI->getEquivalentSGPRClass(RC);
   if (TRI->isSGPRClass(RC) && isDivergent)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2c30bfcb5522a8..4a94d690297949 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7703,8 +7703,8 @@ void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
       // Insert a trivial select instead of creating a copy, because a copy from
       // SCC would semantically mean just copying a single bit, but we may need
       // the result to be a vector condition mask that needs preserving.
-      unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64
-                                                      : AMDGPU::S_CSELECT_B32;
+      unsigned Opcode =
+          ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
       auto NewSelect =
           BuildMI(MBB, MII, DL, get(Opcode), NewCondReg).addImm(-1).addImm(0);
       NewSelect->getOperand(3).setIsUndef(Cond.isUndef());
@@ -8716,7 +8716,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
   }
 
   // IndexStride = 64 / 32.
-  uint64_t IndexStride = ST.getWavefrontSize() == 64 ? 3 : 2;
+  uint64_t IndexStride = ST.isWave64() ? 3 : 2;
   Rsrc23 |= IndexStride << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
 
   // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 67d9e4138753f7..84cb1e48772cae 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -6,9 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
+def isWave32 : Predicate<"Subtarget->isWave32()">,
   AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
-def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
+def isWave64 : Predicate<"Subtarget->isWave64()">,
   AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
 
 class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">

>From 71f00daec7018d5a746eeb386f21b43d3bd038a1 Mon Sep 17 00:00:00 2001
From: Helena Kotas <hekotas at microsoft.com>
Date: Sat, 23 Nov 2024 09:33:38 -0800
Subject: [PATCH 07/12] [HLSL] Add `Increment`/`DecrementCounter` methods to
 structured buffers (#114148)

Introduces `__builtin_hlsl_buffer_update_counter` clang buildin that is
used to implement the `IncrementCounter` and `DecrementCounter` methods
on `RWStructuredBuffer` and `RasterizerOrderedStructuredBuffer` (see
Note).

The builtin is translated to LLVM intrisic `llvm.dx.bufferUpdateCounter`
or `llvm.spv.bufferUpdateCounter`.

Introduces `BuiltinTypeMethodBuilder` helper in `HLSLExternalSemaSource`
that enables adding methods to builtin types using builder pattern like
this:
```
   BuiltinTypeMethodBuilder(Sema, RecordBuilder, "MethodName", ReturnType)
       .addParam("param_name", Type, InOutModifier)
       .callBuiltin("buildin_name", { BuiltinParams })
       .finalizeMethod();
```

Fixes #113513
---
 clang/include/clang/Basic/Builtins.td         |   7 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   6 +
 clang/lib/CodeGen/CGBuiltin.cpp               |   9 +
 clang/lib/CodeGen/CGHLSLRuntime.h             |   1 +
 clang/lib/Sema/HLSLExternalSemaSource.cpp     | 414 ++++++++++++++----
 clang/lib/Sema/SemaExpr.cpp                   |   3 +
 clang/lib/Sema/SemaHLSL.cpp                   |  54 +++
 .../test/AST/HLSL/RWStructuredBuffer-AST.hlsl |  26 ++
 .../StructuredBuffers-methods-lib.hlsl        |  25 ++
 .../StructuredBuffers-methods-ps.hlsl         |  28 ++
 .../buffer_update_counter-errors.hlsl         |  48 ++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   2 +-
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |   5 +
 llvm/lib/Target/DirectX/DXILOpLowering.cpp    |   2 +-
 14 files changed, 534 insertions(+), 96 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/buffer_update_counter-errors.hlsl

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 83c90b3d6e681b..eaff744924805e 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4882,7 +4882,6 @@ def HLSLSaturate : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
-
 def HLSLSelect : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_select"];
   let Attributes = [NoThrow, Const];
@@ -4907,6 +4906,12 @@ def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLBufferUpdateCounter : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_buffer_update_counter"];
+  let Attributes = [NoThrow];
+  let Prototype = "uint32_t(...)";
+}
+
 def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_splitdouble"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eb05a6a77978af..f21b125252b063 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7287,6 +7287,8 @@ def err_typecheck_illegal_increment_decrement : Error<
   "cannot %select{decrement|increment}1 value of type %0">;
 def err_typecheck_expect_int : Error<
   "used type %0 where integer is required">;
+def err_typecheck_expect_hlsl_resource : Error<
+  "used type %0 where __hlsl_resource_t is required">;
 def err_typecheck_arithmetic_incomplete_or_sizeless_type : Error<
   "arithmetic on a pointer to %select{an incomplete|sizeless}0 type %1">;
 def err_typecheck_pointer_arith_function_type : Error<
@@ -12528,6 +12530,10 @@ def warn_attr_min_eq_max:  Warning<
 
 def err_hlsl_attribute_number_arguments_insufficient_shader_model: Error<
   "attribute %0 with %1 arguments requires shader model %2 or greater">;
+def err_hlsl_expect_arg_const_int_one_or_neg_one: Error<
+  "argument %0 must be constant integer 1 or -1">;
+def err_invalid_hlsl_resource_type: Error<
+  "invalid __hlsl_resource_t type attributes">;
 
 // Layout randomization diagnostics.
 def err_non_designated_init_used : Error<
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8f754953d28998..cde03a92b02853 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19409,6 +19409,15 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
         CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
         nullptr, "hlsl.radians");
   }
+  case Builtin::BI__builtin_hlsl_buffer_update_counter: {
+    Value *ResHandle = EmitScalarExpr(E->getArg(0));
+    Value *Offset = EmitScalarExpr(E->getArg(1));
+    Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/Offset->getType(),
+        CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
+        ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
+  }
   case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
 
     assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index a8e0ed42b79a35..854214d6bc0677 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -102,6 +102,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
 
   GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, bufferUpdateCounter)
 
   //===----------------------------------------------------------------------===//
   // End of reserved area for HLSL intrinsic getters.
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index 822202fd81dc89..fcc74a2e8e71b3 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -12,7 +12,9 @@
 #include "clang/Sema/HLSLExternalSemaSource.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Sema/Lookup.h"
@@ -20,36 +22,43 @@
 #include "clang/Sema/SemaHLSL.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Frontend/HLSL/HLSLResource.h"
+#include "llvm/Support/ErrorHandling.h"
 
 #include <functional>
 
 using namespace clang;
 using namespace llvm::hlsl;
 
+static FunctionDecl *lookupBuiltinFunction(Sema &S, StringRef Name);
+
 namespace {
 
 struct TemplateParameterListBuilder;
 
 struct BuiltinTypeDeclBuilder {
+  Sema &SemaRef;
   CXXRecordDecl *Record = nullptr;
   ClassTemplateDecl *Template = nullptr;
   ClassTemplateDecl *PrevTemplate = nullptr;
   NamespaceDecl *HLSLNamespace = nullptr;
   llvm::StringMap<FieldDecl *> Fields;
 
-  BuiltinTypeDeclBuilder(CXXRecordDecl *R) : Record(R) {
+  BuiltinTypeDeclBuilder(Sema &SemaRef, CXXRecordDecl *R)
+      : SemaRef(SemaRef), Record(R) {
     Record->startDefinition();
     Template = Record->getDescribedClassTemplate();
   }
 
-  BuiltinTypeDeclBuilder(Sema &S, NamespaceDecl *Namespace, StringRef Name)
-      : HLSLNamespace(Namespace) {
-    ASTContext &AST = S.getASTContext();
+  BuiltinTypeDeclBuilder(Sema &SemaRef, NamespaceDecl *Namespace,
+                         StringRef Name)
+      : SemaRef(SemaRef), HLSLNamespace(Namespace) {
+    ASTContext &AST = SemaRef.getASTContext();
     IdentifierInfo &II = AST.Idents.get(Name, tok::TokenKind::identifier);
 
-    LookupResult Result(S, &II, SourceLocation(), Sema::LookupTagName);
+    LookupResult Result(SemaRef, &II, SourceLocation(), Sema::LookupTagName);
     CXXRecordDecl *PrevDecl = nullptr;
-    if (S.LookupQualifiedName(Result, HLSLNamespace)) {
+    if (SemaRef.LookupQualifiedName(Result, HLSLNamespace)) {
+      // Declaration already exists (from precompiled headers)
       NamedDecl *Found = Result.getFoundDecl();
       if (auto *TD = dyn_cast<ClassTemplateDecl>(Found)) {
         PrevDecl = TD->getTemplatedDecl();
@@ -61,6 +70,7 @@ struct BuiltinTypeDeclBuilder {
 
     if (PrevDecl && PrevDecl->isCompleteDefinition()) {
       Record = PrevDecl;
+      Template = PrevTemplate;
       return;
     }
 
@@ -84,8 +94,7 @@ struct BuiltinTypeDeclBuilder {
   BuiltinTypeDeclBuilder &
   addMemberVariable(StringRef Name, QualType Type, llvm::ArrayRef<Attr *> Attrs,
                     AccessSpecifier Access = AccessSpecifier::AS_private) {
-    if (Record->isCompleteDefinition())
-      return *this;
+    assert(!Record->isCompleteDefinition() && "record is already complete");
     assert(Record->isBeingDefined() &&
            "Definition must be started before adding members!");
     ASTContext &AST = Record->getASTContext();
@@ -109,22 +118,16 @@ struct BuiltinTypeDeclBuilder {
   }
 
   BuiltinTypeDeclBuilder &
-  addHandleMember(Sema &S, ResourceClass RC, ResourceKind RK, bool IsROV,
-                  bool RawBuffer,
+  addHandleMember(ResourceClass RC, ResourceKind RK, bool IsROV, bool RawBuffer,
                   AccessSpecifier Access = AccessSpecifier::AS_private) {
-    if (Record->isCompleteDefinition())
-      return *this;
+    assert(!Record->isCompleteDefinition() && "record is already complete");
 
-    ASTContext &Ctx = S.getASTContext();
+    ASTContext &Ctx = SemaRef.getASTContext();
     TypeSourceInfo *ElementTypeInfo = nullptr;
 
     QualType ElemTy = Ctx.Char8Ty;
-    if (Template) {
-      if (const auto *TTD = dyn_cast<TemplateTypeParmDecl>(
-              Template->getTemplateParameters()->getParam(0))) {
-        ElemTy = QualType(TTD->getTypeForDecl(), 0);
-      }
-    }
+    if (Template)
+      ElemTy = getFirstTemplateTypeParam();
     ElementTypeInfo = Ctx.getTrivialTypeSourceInfo(ElemTy, SourceLocation());
 
     // add handle member with resource type attributes
@@ -137,32 +140,13 @@ struct BuiltinTypeDeclBuilder {
             ? HLSLContainedTypeAttr::CreateImplicit(Ctx, ElementTypeInfo)
             : nullptr};
     Attr *ResourceAttr = HLSLResourceAttr::CreateImplicit(Ctx, RK);
-    if (CreateHLSLAttributedResourceType(S, Ctx.HLSLResourceTy, Attrs,
+    if (CreateHLSLAttributedResourceType(SemaRef, Ctx.HLSLResourceTy, Attrs,
                                          AttributedResTy))
       addMemberVariable("__handle", AttributedResTy, {ResourceAttr}, Access);
     return *this;
   }
 
-  static DeclRefExpr *lookupBuiltinFunction(ASTContext &AST, Sema &S,
-                                            StringRef Name) {
-    IdentifierInfo &II = AST.Idents.get(Name, tok::TokenKind::identifier);
-    DeclarationNameInfo NameInfo =
-        DeclarationNameInfo(DeclarationName(&II), SourceLocation());
-    LookupResult R(S, NameInfo, Sema::LookupOrdinaryName);
-    // AllowBuiltinCreation is false but LookupDirect will create
-    // the builtin when searching the global scope anyways...
-    S.LookupName(R, S.getCurScope());
-    // FIXME: If the builtin function was user-declared in global scope,
-    // this assert *will* fail. Should this call LookupBuiltin instead?
-    assert(R.isSingleResult() &&
-           "Since this is a builtin it should always resolve!");
-    auto *VD = cast<ValueDecl>(R.getFoundDecl());
-    QualType Ty = VD->getType();
-    return DeclRefExpr::Create(AST, NestedNameSpecifierLoc(), SourceLocation(),
-                               VD, false, NameInfo, Ty, VK_PRValue);
-  }
-
-  BuiltinTypeDeclBuilder &addDefaultHandleConstructor(Sema &S) {
+  BuiltinTypeDeclBuilder &addDefaultHandleConstructor() {
     if (Record->isCompleteDefinition())
       return *this;
     ASTContext &AST = Record->getASTContext();
@@ -187,25 +171,18 @@ struct BuiltinTypeDeclBuilder {
   }
 
   BuiltinTypeDeclBuilder &addArraySubscriptOperators() {
-    if (Record->isCompleteDefinition())
-      return *this;
     addArraySubscriptOperator(true);
     addArraySubscriptOperator(false);
     return *this;
   }
 
   BuiltinTypeDeclBuilder &addArraySubscriptOperator(bool IsConst) {
-    if (Record->isCompleteDefinition())
-      return *this;
+    assert(!Record->isCompleteDefinition() && "record is already complete");
 
     ASTContext &AST = Record->getASTContext();
     QualType ElemTy = AST.Char8Ty;
-    if (Template) {
-      if (const auto *TTD = dyn_cast<TemplateTypeParmDecl>(
-              Template->getTemplateParameters()->getParam(0))) {
-        ElemTy = QualType(TTD->getTypeForDecl(), 0);
-      }
-    }
+    if (Template)
+      ElemTy = getFirstTemplateTypeParam();
     QualType ReturnTy = ElemTy;
 
     FunctionProtoType::ExtProtoInfo ExtInfo;
@@ -271,16 +248,31 @@ struct BuiltinTypeDeclBuilder {
     return *this;
   }
 
+  FieldDecl *getResourceHandleField() {
+    auto I = Fields.find("__handle");
+    assert(I != Fields.end() &&
+           I->second->getType()->isHLSLAttributedResourceType() &&
+           "record does not have resource handle field");
+    return I->second;
+  }
+
+  QualType getFirstTemplateTypeParam() {
+    assert(Template && "record it not a template");
+    if (const auto *TTD = dyn_cast<TemplateTypeParmDecl>(
+            Template->getTemplateParameters()->getParam(0))) {
+      return QualType(TTD->getTypeForDecl(), 0);
+    }
+    return QualType();
+  }
+
   BuiltinTypeDeclBuilder &startDefinition() {
-    if (Record->isCompleteDefinition())
-      return *this;
+    assert(!Record->isCompleteDefinition() && "record is already complete");
     Record->startDefinition();
     return *this;
   }
 
   BuiltinTypeDeclBuilder &completeDefinition() {
-    if (Record->isCompleteDefinition())
-      return *this;
+    assert(!Record->isCompleteDefinition() && "record is already complete");
     assert(Record->isBeingDefined() &&
            "Definition must be started before completing it.");
 
@@ -288,38 +280,47 @@ struct BuiltinTypeDeclBuilder {
     return *this;
   }
 
-  TemplateParameterListBuilder addTemplateArgumentList(Sema &S);
-  BuiltinTypeDeclBuilder &
-  addSimpleTemplateParams(Sema &S, ArrayRef<StringRef> Names, ConceptDecl *CD);
-  BuiltinTypeDeclBuilder &addConceptSpecializationExpr(Sema &S);
+  Expr *getConstantIntExpr(int value) {
+    ASTContext &AST = SemaRef.getASTContext();
+    return IntegerLiteral::Create(
+        AST, llvm::APInt(AST.getTypeSize(AST.IntTy), value, true), AST.IntTy,
+        SourceLocation());
+  }
+
+  TemplateParameterListBuilder addTemplateArgumentList();
+  BuiltinTypeDeclBuilder &addSimpleTemplateParams(ArrayRef<StringRef> Names,
+                                                  ConceptDecl *CD);
+
+  // Builtin types methods
+  BuiltinTypeDeclBuilder &addIncrementCounterMethod();
+  BuiltinTypeDeclBuilder &addDecrementCounterMethod();
 };
 
 struct TemplateParameterListBuilder {
   BuiltinTypeDeclBuilder &Builder;
-  Sema &S;
   llvm::SmallVector<NamedDecl *> Params;
 
-  TemplateParameterListBuilder(Sema &S, BuiltinTypeDeclBuilder &RB)
-      : Builder(RB), S(S) {}
+  TemplateParameterListBuilder(BuiltinTypeDeclBuilder &RB) : Builder(RB) {}
 
   ~TemplateParameterListBuilder() { finalizeTemplateArgs(); }
 
   TemplateParameterListBuilder &
   addTypeParameter(StringRef Name, QualType DefaultValue = QualType()) {
-    if (Builder.Record->isCompleteDefinition())
-      return *this;
+    assert(!Builder.Record->isCompleteDefinition() &&
+           "record is already complete");
+    ASTContext &AST = Builder.SemaRef.getASTContext();
     unsigned Position = static_cast<unsigned>(Params.size());
     auto *Decl = TemplateTypeParmDecl::Create(
-        S.Context, Builder.Record->getDeclContext(), SourceLocation(),
+        AST, Builder.Record->getDeclContext(), SourceLocation(),
         SourceLocation(), /* TemplateDepth */ 0, Position,
-        &S.Context.Idents.get(Name, tok::TokenKind::identifier),
+        &AST.Idents.get(Name, tok::TokenKind::identifier),
         /* Typename */ true,
         /* ParameterPack */ false,
         /* HasTypeConstraint*/ false);
     if (!DefaultValue.isNull())
-      Decl->setDefaultArgument(
-          S.Context, S.getTrivialTemplateArgumentLoc(DefaultValue, QualType(),
-                                                     SourceLocation()));
+      Decl->setDefaultArgument(AST,
+                               Builder.SemaRef.getTrivialTemplateArgumentLoc(
+                                   DefaultValue, QualType(), SourceLocation()));
 
     Params.emplace_back(Decl);
     return *this;
@@ -421,14 +422,14 @@ struct TemplateParameterListBuilder {
   BuiltinTypeDeclBuilder &finalizeTemplateArgs(ConceptDecl *CD = nullptr) {
     if (Params.empty())
       return Builder;
-    ConceptSpecializationExpr *CSE =
-        CD ? constructConceptSpecializationExpr(S, CD) : nullptr;
 
-    auto *ParamList = TemplateParameterList::Create(S.Context, SourceLocation(),
-                                                    SourceLocation(), Params,
-                                                    SourceLocation(), CSE);
+    ASTContext &AST = Builder.SemaRef.Context;
+    ConceptSpecializationExpr *CSE =
+        CD ? constructConceptSpecializationExpr(Builder.SemaRef, CD) : nullptr;
+    auto *ParamList = TemplateParameterList::Create(
+        AST, SourceLocation(), SourceLocation(), Params, SourceLocation(), CSE);
     Builder.Template = ClassTemplateDecl::Create(
-        S.Context, Builder.Record->getDeclContext(), SourceLocation(),
+        AST, Builder.Record->getDeclContext(), SourceLocation(),
         DeclarationName(Builder.Record->getIdentifier()), ParamList,
         Builder.Record);
 
@@ -443,26 +444,233 @@ struct TemplateParameterListBuilder {
     Params.clear();
 
     QualType T = Builder.Template->getInjectedClassNameSpecialization();
-    T = S.Context.getInjectedClassNameType(Builder.Record, T);
+    T = AST.getInjectedClassNameType(Builder.Record, T);
 
     return Builder;
   }
 };
+
+// Builder for methods of builtin types. Allows adding methods to builtin types
+// using the builder pattern like this:
+//
+//   BuiltinTypeMethodBuilder(Sema, RecordBuilder, "MethodName", ReturnType)
+//       .addParam("param_name", Type, InOutModifier)
+//       .callBuiltin("buildin_name", { BuiltinParams })
+//       .finalizeMethod();
+//
+// The builder needs to have all of the method parameters before it can create
+// a CXXMethodDecl. It collects them in addParam calls and when a first
+// method that builds the body is called or when access to 'this` is needed it
+// creates the CXXMethodDecl and ParmVarDecls instances. These can then be
+// referenced from the body building methods. Destructor or an explicit call to
+// finalizeMethod() will complete the method definition.
+//
+// The callBuiltin helper method passes in the resource handle as the first
+// argument of the builtin call. If this is not desired it takes a bool flag to
+// disable this.
+//
+// If the method that is being built has a non-void return type the
+// finalizeMethod will create a return statent with the value of the last
+// statement (unless the last statement is already a ReturnStmt).
+struct BuiltinTypeMethodBuilder {
+  struct MethodParam {
+    const IdentifierInfo &NameII;
+    QualType Ty;
+    HLSLParamModifierAttr::Spelling Modifier;
+    MethodParam(const IdentifierInfo &NameII, QualType Ty,
+                HLSLParamModifierAttr::Spelling Modifier)
+        : NameII(NameII), Ty(Ty), Modifier(Modifier) {}
+  };
+
+  BuiltinTypeDeclBuilder &DeclBuilder;
+  DeclarationNameInfo NameInfo;
+  QualType ReturnTy;
+  CXXMethodDecl *Method;
+  llvm::SmallVector<MethodParam> Params;
+  llvm::SmallVector<Stmt *> StmtsList;
+
+public:
+  BuiltinTypeMethodBuilder(Sema &S, BuiltinTypeDeclBuilder &DB, StringRef Name,
+                           QualType ReturnTy)
+      : DeclBuilder(DB), ReturnTy(ReturnTy), Method(nullptr) {
+    const IdentifierInfo &II =
+        S.getASTContext().Idents.get(Name, tok::TokenKind::identifier);
+    NameInfo = DeclarationNameInfo(DeclarationName(&II), SourceLocation());
+  }
+
+  BuiltinTypeMethodBuilder &addParam(StringRef Name, QualType Ty,
+                                     HLSLParamModifierAttr::Spelling Modifier =
+                                         HLSLParamModifierAttr::Keyword_in) {
+    assert(Method == nullptr && "Cannot add param, method already created");
+    llvm_unreachable("not yet implemented");
+  }
+
+private:
+  void createMethodDecl() {
+    assert(Method == nullptr && "Method already created");
+
+    // create method type
+    ASTContext &AST = DeclBuilder.SemaRef.getASTContext();
+    SmallVector<QualType> ParamTypes;
+    for (MethodParam &MP : Params)
+      ParamTypes.emplace_back(MP.Ty);
+    QualType MethodTy = AST.getFunctionType(ReturnTy, ParamTypes,
+                                            FunctionProtoType::ExtProtoInfo());
+
+    // create method decl
+    auto *TSInfo = AST.getTrivialTypeSourceInfo(MethodTy, SourceLocation());
+    Method =
+        CXXMethodDecl::Create(AST, DeclBuilder.Record, SourceLocation(),
+                              NameInfo, MethodTy, TSInfo, SC_None, false, false,
+                              ConstexprSpecKind::Unspecified, SourceLocation());
+
+    // create params & set them to the function prototype
+    SmallVector<ParmVarDecl *> ParmDecls;
+    auto FnProtoLoc =
+        Method->getTypeSourceInfo()->getTypeLoc().getAs<FunctionProtoTypeLoc>();
+    for (int I = 0, E = Params.size(); I != E; I++) {
+      MethodParam &MP = Params[I];
+      ParmVarDecl *Parm = ParmVarDecl::Create(
+          AST, Method->getDeclContext(), SourceLocation(), SourceLocation(),
+          &MP.NameII, MP.Ty,
+          AST.getTrivialTypeSourceInfo(MP.Ty, SourceLocation()), SC_None,
+          nullptr);
+      if (MP.Modifier != HLSLParamModifierAttr::Keyword_in) {
+        auto *Mod =
+            HLSLParamModifierAttr::Create(AST, SourceRange(), MP.Modifier);
+        Parm->addAttr(Mod);
+      }
+      ParmDecls.push_back(Parm);
+      FnProtoLoc.setParam(I, Parm);
+    }
+    Method->setParams({ParmDecls});
+  }
+
+public:
+  ~BuiltinTypeMethodBuilder() { finalizeMethod(); }
+
+  Expr *getResourceHandleExpr() {
+    // The first statement added to a method or access to 'this' creates the
+    // declaration.
+    if (!Method)
+      createMethodDecl();
+
+    ASTContext &AST = DeclBuilder.SemaRef.getASTContext();
+    CXXThisExpr *This = CXXThisExpr::Create(
+        AST, SourceLocation(), Method->getFunctionObjectParameterType(), true);
+    FieldDecl *HandleField = DeclBuilder.getResourceHandleField();
+    return MemberExpr::CreateImplicit(AST, This, false, HandleField,
+                                      HandleField->getType(), VK_LValue,
+                                      OK_Ordinary);
+  }
+
+  BuiltinTypeMethodBuilder &
+  callBuiltin(StringRef BuiltinName, ArrayRef<Expr *> CallParms,
+              bool AddResourceHandleAsFirstArg = true) {
+
+    // The first statement added to a method or access to 'this` creates the
+    // declaration.
+    if (!Method)
+      createMethodDecl();
+
+    ASTContext &AST = DeclBuilder.SemaRef.getASTContext();
+    FunctionDecl *FD = lookupBuiltinFunction(DeclBuilder.SemaRef, BuiltinName);
+    DeclRefExpr *DRE = DeclRefExpr::Create(
+        AST, NestedNameSpecifierLoc(), SourceLocation(), FD, false,
+        FD->getNameInfo(), FD->getType(), VK_PRValue);
+
+    SmallVector<Expr *> NewCallParms;
+    if (AddResourceHandleAsFirstArg) {
+      NewCallParms.push_back(getResourceHandleExpr());
+      for (auto *P : CallParms)
+        NewCallParms.push_back(P);
+    }
+
+    Expr *Call = CallExpr::Create(
+        AST, DRE, AddResourceHandleAsFirstArg ? NewCallParms : CallParms,
+        FD->getReturnType(), VK_PRValue, SourceLocation(), FPOptionsOverride());
+    StmtsList.push_back(Call);
+    return *this;
+  }
+
+  BuiltinTypeDeclBuilder &finalizeMethod() {
+    assert(!DeclBuilder.Record->isCompleteDefinition() &&
+           "record is already complete");
+    assert(
+        Method != nullptr &&
+        "method decl not created; are you missing a call to build the body?");
+
+    if (!Method->hasBody()) {
+      ASTContext &AST = DeclBuilder.SemaRef.getASTContext();
+      assert((ReturnTy == AST.VoidTy || !StmtsList.empty()) &&
+             "nothing to return from non-void method");
+      if (ReturnTy != AST.VoidTy) {
+        if (Expr *LastExpr = dyn_cast<Expr>(StmtsList.back())) {
+          assert(AST.hasSameUnqualifiedType(
+                     isa<CallExpr>(LastExpr)
+                         ? cast<CallExpr>(LastExpr)->getCallReturnType(AST)
+                         : LastExpr->getType(),
+                     ReturnTy) &&
+                 "Return type of the last statement must match the return type "
+                 "of the method");
+          if (!isa<ReturnStmt>(LastExpr)) {
+            StmtsList.pop_back();
+            StmtsList.push_back(
+                ReturnStmt::Create(AST, SourceLocation(), LastExpr, nullptr));
+          }
+        }
+      }
+
+      Method->setBody(CompoundStmt::Create(AST, StmtsList, FPOptionsOverride(),
+                                           SourceLocation(), SourceLocation()));
+      Method->setLexicalDeclContext(DeclBuilder.Record);
+      Method->setAccess(AccessSpecifier::AS_public);
+      Method->addAttr(AlwaysInlineAttr::CreateImplicit(
+          AST, SourceRange(), AlwaysInlineAttr::CXX11_clang_always_inline));
+      DeclBuilder.Record->addDecl(Method);
+    }
+    return DeclBuilder;
+  }
+};
+
 } // namespace
 
-TemplateParameterListBuilder
-BuiltinTypeDeclBuilder::addTemplateArgumentList(Sema &S) {
-  return TemplateParameterListBuilder(S, *this);
+TemplateParameterListBuilder BuiltinTypeDeclBuilder::addTemplateArgumentList() {
+  return TemplateParameterListBuilder(*this);
 }
 
-BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addSimpleTemplateParams(
-    Sema &S, ArrayRef<StringRef> Names, ConceptDecl *CD = nullptr) {
-  TemplateParameterListBuilder Builder = this->addTemplateArgumentList(S);
+BuiltinTypeDeclBuilder &
+BuiltinTypeDeclBuilder::addSimpleTemplateParams(ArrayRef<StringRef> Names,
+                                                ConceptDecl *CD = nullptr) {
+  if (Record->isCompleteDefinition()) {
+    assert(Template && "existing record it not a template");
+    assert(Template->getTemplateParameters()->size() == Names.size() &&
+           "template param count mismatch");
+    return *this;
+  }
+
+  TemplateParameterListBuilder Builder = this->addTemplateArgumentList();
   for (StringRef Name : Names)
     Builder.addTypeParameter(Name);
   return Builder.finalizeTemplateArgs(CD);
 }
 
+BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addIncrementCounterMethod() {
+  return BuiltinTypeMethodBuilder(SemaRef, *this, "IncrementCounter",
+                                  SemaRef.getASTContext().UnsignedIntTy)
+      .callBuiltin("__builtin_hlsl_buffer_update_counter",
+                   {getConstantIntExpr(1)})
+      .finalizeMethod();
+}
+
+BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addDecrementCounterMethod() {
+  return BuiltinTypeMethodBuilder(SemaRef, *this, "DecrementCounter",
+                                  SemaRef.getASTContext().UnsignedIntTy)
+      .callBuiltin("__builtin_hlsl_buffer_update_counter",
+                   {getConstantIntExpr(-1)})
+      .finalizeMethod();
+}
+
 HLSLExternalSemaSource::~HLSLExternalSemaSource() {}
 
 void HLSLExternalSemaSource::InitializeSema(Sema &S) {
@@ -566,9 +774,9 @@ void HLSLExternalSemaSource::defineTrivialHLSLTypes() {
 static BuiltinTypeDeclBuilder setupBufferType(CXXRecordDecl *Decl, Sema &S,
                                               ResourceClass RC, ResourceKind RK,
                                               bool IsROV, bool RawBuffer) {
-  return BuiltinTypeDeclBuilder(Decl)
-      .addHandleMember(S, RC, RK, IsROV, RawBuffer)
-      .addDefaultHandleConstructor(S);
+  return BuiltinTypeDeclBuilder(S, Decl)
+      .addHandleMember(RC, RK, IsROV, RawBuffer)
+      .addDefaultHandleConstructor();
 }
 
 Expr *constructTypedBufferConstraintExpr(Sema &S, SourceLocation NameLoc,
@@ -636,8 +844,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
   ConceptDecl *TypedBufferConcept =
       constructTypedBufferConceptDecl(*SemaPtr, HLSLNamespace);
   Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RWBuffer")
-             .addSimpleTemplateParams(*SemaPtr, {"element_type"},
-                                      TypedBufferConcept)
+             .addSimpleTemplateParams({"element_type"}, TypedBufferConcept)
              .Record;
 
   onCompletion(Decl, [this](CXXRecordDecl *Decl) {
@@ -650,7 +857,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
 
   Decl =
       BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RasterizerOrderedBuffer")
-          .addSimpleTemplateParams(*SemaPtr, {"element_type"})
+          .addSimpleTemplateParams({"element_type"})
           .Record;
   onCompletion(Decl, [this](CXXRecordDecl *Decl) {
     setupBufferType(Decl, *SemaPtr, ResourceClass::UAV,
@@ -661,7 +868,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
   });
 
   Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "StructuredBuffer")
-             .addSimpleTemplateParams(*SemaPtr, {"element_type"})
+             .addSimpleTemplateParams({"element_type"})
              .Record;
   onCompletion(Decl, [this](CXXRecordDecl *Decl) {
     setupBufferType(Decl, *SemaPtr, ResourceClass::SRV, ResourceKind::RawBuffer,
@@ -671,18 +878,20 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
   });
 
   Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "RWStructuredBuffer")
-             .addSimpleTemplateParams(*SemaPtr, {"element_type"})
+             .addSimpleTemplateParams({"element_type"})
              .Record;
   onCompletion(Decl, [this](CXXRecordDecl *Decl) {
     setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer,
                     /*IsROV=*/false, /*RawBuffer=*/true)
         .addArraySubscriptOperators()
+        .addIncrementCounterMethod()
+        .addDecrementCounterMethod()
         .completeDefinition();
   });
 
   Decl =
       BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "AppendStructuredBuffer")
-          .addSimpleTemplateParams(*SemaPtr, {"element_type"})
+          .addSimpleTemplateParams({"element_type"})
           .Record;
   onCompletion(Decl, [this](CXXRecordDecl *Decl) {
     setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer,
@@ -692,7 +901,7 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
 
   Decl =
       BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace, "ConsumeStructuredBuffer")
-          .addSimpleTemplateParams(*SemaPtr, {"element_type"})
+          .addSimpleTemplateParams({"element_type"})
           .Record;
   onCompletion(Decl, [this](CXXRecordDecl *Decl) {
     setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer,
@@ -702,19 +911,22 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() {
 
   Decl = BuiltinTypeDeclBuilder(*SemaPtr, HLSLNamespace,
                                 "RasterizerOrderedStructuredBuffer")
-             .addSimpleTemplateParams(*SemaPtr, {"element_type"})
+             .addSimpleTemplateParams({"element_type"})
              .Record;
   onCompletion(Decl, [this](CXXRecordDecl *Decl) {
     setupBufferType(Decl, *SemaPtr, ResourceClass::UAV, ResourceKind::RawBuffer,
                     /*IsROV=*/true, /*RawBuffer=*/true)
         .addArraySubscriptOperators()
+        .addIncrementCounterMethod()
+        .addDecrementCounterMethod()
         .completeDefinition();
   });
 }
 
 void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record,
                                           CompletionFunction Fn) {
-  Completions.insert(std::make_pair(Record->getCanonicalDecl(), Fn));
+  if (!Record->isCompleteDefinition())
+    Completions.insert(std::make_pair(Record->getCanonicalDecl(), Fn));
 }
 
 void HLSLExternalSemaSource::CompleteType(TagDecl *Tag) {
@@ -732,3 +944,19 @@ void HLSLExternalSemaSource::CompleteType(TagDecl *Tag) {
     return;
   It->second(Record);
 }
+
+static FunctionDecl *lookupBuiltinFunction(Sema &S, StringRef Name) {
+  IdentifierInfo &II =
+      S.getASTContext().Idents.get(Name, tok::TokenKind::identifier);
+  DeclarationNameInfo NameInfo =
+      DeclarationNameInfo(DeclarationName(&II), SourceLocation());
+  LookupResult R(S, NameInfo, Sema::LookupOrdinaryName);
+  // AllowBuiltinCreation is false but LookupDirect will create
+  // the builtin when searching the global scope anyways...
+  S.LookupName(R, S.getCurScope());
+  // FIXME: If the builtin function was user-declared in global scope,
+  // this assert *will* fail. Should this call LookupBuiltin instead?
+  assert(R.isSingleResult() &&
+         "Since this is a builtin it should always resolve!");
+  return cast<FunctionDecl>(R.getFoundDecl());
+}
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 6c7472ce92703b..c9d7444d5865a5 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -983,6 +983,9 @@ Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) {
   if (getLangOpts().MSVCCompat)
     return VAK_MSVCUndefined;
 
+  if (getLangOpts().HLSL && Ty->getAs<HLSLAttributedResourceType>())
+    return VAK_Valid;
+
   // FIXME: In C++11, these cases are conditionally-supported, meaning we're
   // permitted to reject them. We should consider doing so.
   return VAK_Undefined;
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 2bc93e4ec1181f..289d9dc0f11306 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1696,6 +1696,16 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
   return true;
 }
 
+bool CheckArgTypeIsCorrect(Sema *S, Expr *Arg, QualType ExpectedType) {
+  QualType ArgType = Arg->getType();
+  if (!S->getASTContext().hasSameUnqualifiedType(ArgType, ExpectedType)) {
+    S->Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+        << ArgType << ExpectedType << 1 << 0 << 0;
+    return true;
+  }
+  return false;
+}
+
 bool CheckArgTypeIsCorrect(
     Sema *S, Expr *Arg, QualType ExpectedType,
     llvm::function_ref<bool(clang::QualType PassedType)> Check) {
@@ -1878,6 +1888,29 @@ static bool CheckVectorSelect(Sema *S, CallExpr *TheCall) {
   return false;
 }
 
+static bool CheckResourceHandle(
+    Sema *S, CallExpr *TheCall, unsigned ArgIndex,
+    llvm::function_ref<bool(const HLSLAttributedResourceType *ResType)> Check =
+        nullptr) {
+  assert(TheCall->getNumArgs() >= ArgIndex);
+  QualType ArgType = TheCall->getArg(ArgIndex)->getType();
+  const HLSLAttributedResourceType *ResTy =
+      ArgType.getTypePtr()->getAs<HLSLAttributedResourceType>();
+  if (!ResTy) {
+    S->Diag(TheCall->getArg(0)->getBeginLoc(),
+            diag::err_typecheck_expect_hlsl_resource)
+        << ArgType;
+    return true;
+  }
+  if (Check && Check(ResTy)) {
+    S->Diag(TheCall->getArg(ArgIndex)->getExprLoc(),
+            diag::err_invalid_hlsl_resource_type)
+        << ArgType;
+    return true;
+  }
+  return false;
+}
+
 // Note: returning true in this case results in CheckBuiltinFunctionCall
 // returning an ExprError
 bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
@@ -2176,6 +2209,27 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_buffer_update_counter: {
+    auto checkResTy = [](const HLSLAttributedResourceType *ResTy) -> bool {
+      return !(ResTy->getAttrs().ResourceClass == ResourceClass::UAV &&
+               ResTy->getAttrs().RawBuffer && ResTy->hasContainedType());
+    };
+    if (SemaRef.checkArgCount(TheCall, 2) ||
+        CheckResourceHandle(&SemaRef, TheCall, 0, checkResTy) ||
+        CheckArgTypeIsCorrect(&SemaRef, TheCall->getArg(1),
+                              SemaRef.getASTContext().IntTy))
+      return true;
+    Expr *OffsetExpr = TheCall->getArg(1);
+    std::optional<llvm::APSInt> Offset =
+        OffsetExpr->getIntegerConstantExpr(SemaRef.getASTContext());
+    if (!Offset.has_value() || abs(Offset->getExtValue()) != 1) {
+      SemaRef.Diag(TheCall->getArg(1)->getBeginLoc(),
+                   diag::err_hlsl_expect_arg_const_int_one_or_neg_one)
+          << 1;
+      return true;
+    }
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl b/clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl
index bef054a62e794d..a1af001e2cad62 100644
--- a/clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl
+++ b/clang/test/AST/HLSL/RWStructuredBuffer-AST.hlsl
@@ -52,6 +52,32 @@ RWStructuredBuffer<int> Buffer;
 // CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'RWStructuredBuffer<element_type>' lvalue implicit this
 // CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
 
+// CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> IncrementCounter 'unsigned int ()'
+// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: CallExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int'
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int (...) noexcept' Function 0x{{[0-9A-Fa-f]+}} '__builtin_hlsl_buffer_update_counter' 'unsigned int (...) noexcept'
+// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'RWStructuredBuffer<element_type>' lvalue implicit this
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'int' 1
+// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
+
+// CHECK-NEXT: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> DecrementCounter 'unsigned int ()'
+// CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <<invalid sloc>>
+// CHECK-NEXT: CallExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int'
+// CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'unsigned int (...) noexcept' Function 0x{{[0-9A-Fa-f]+}} '__builtin_hlsl_buffer_update_counter' 'unsigned int (...) noexcept'
+// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> '__hlsl_resource_t
+// CHECK-SAME{LITERAL}: [[hlsl::resource_class(UAV)]]
+// CHECK-SAME{LITERAL}: [[hlsl::raw_buffer]]
+// CHECK-SAME{LITERAL}: [[hlsl::contained_type(element_type)]]' lvalue .__handle
+// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'RWStructuredBuffer<element_type>' lvalue implicit this
+// CHECK-NEXT: IntegerLiteral 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> 'int' -1
+// CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> Implicit always_inline
+
 // CHECK: ClassTemplateSpecializationDecl 0x{{[0-9A-Fa-f]+}} <<invalid sloc>> <invalid sloc> class RWStructuredBuffer definition
 
 // CHECK: TemplateArgument type 'int'
diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl
new file mode 100644
index 00000000000000..128fff9b90a223
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-lib.hlsl
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
+
+// NOTE: SPIRV codegen for resource methods is not yet implemented
+
+RWStructuredBuffer<float> RWSB1 : register(u0);
+RWStructuredBuffer<float> RWSB2 : register(u1);
+
+// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), float }
+
+export void TestIncrementCounter() {
+    RWSB1.IncrementCounter();
+}
+
+// CHECK: define void @_Z20TestIncrementCounterv()
+// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1)
+
+export void TestDecrementCounter() {
+    RWSB2.DecrementCounter();
+}
+
+// CHECK: define void @_Z20TestDecrementCounterv()
+// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 -1)
+
+// CHECK: declare i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i8)
diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl
new file mode 100644
index 00000000000000..e895d30b54007a
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-methods-ps.hlsl
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-pixel -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL
+// RUN-DISABLED: %clang_cc1 -triple spirv-vulkan-pixel -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV
+
+// NOTE: SPIRV codegen for resource methods is not yet implemented
+
+RWStructuredBuffer<float> RWSB1, RWSB2;
+RasterizerOrderedStructuredBuffer<float> ROSB1, ROSB2;
+
+// CHECK: %"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", float, 1, 0), float }
+
+export void TestIncrementCounter() {
+// CHECK: define void @_Z20TestIncrementCounterv()
+// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 1)
+// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i8 1)
+    RWSB1.IncrementCounter();
+    ROSB1.IncrementCounter();
+}
+
+export void TestDecrementCounter() {
+// CHECK: define void @_Z20TestDecrementCounterv()
+// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0) %{{[0-9]+}}, i8 -1)
+// CHECK-DXIL: call i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1) %{{[0-9]+}}, i8 -1)
+    RWSB2.DecrementCounter();
+    ROSB2.DecrementCounter();
+}
+
+// CHECK: declare i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_0t(target("dx.RawBuffer", float, 1, 0), i8)
+// CHECK: declare i32 @llvm.dx.bufferUpdateCounter.tdx.RawBuffer_f32_1_1t(target("dx.RawBuffer", float, 1, 1), i8)
diff --git a/clang/test/SemaHLSL/BuiltIns/buffer_update_counter-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/buffer_update_counter-errors.hlsl
new file mode 100644
index 00000000000000..4aa3ac183d3b15
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/buffer_update_counter-errors.hlsl
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+// RWStructuredBuffer<int>
+using handle_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(int)]] [[hlsl::raw_buffer]];
+// RWBuffer<int>
+using bad_handle_not_raw_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::contained_type(int)]];
+// RWByteAddressBuffer
+using bad_handle_no_type_t = __hlsl_resource_t [[hlsl::resource_class(UAV)]] [[hlsl::raw_buffer]];
+// StructuredBuffer
+using bad_handle_not_uav_t = __hlsl_resource_t [[hlsl::resource_class(SRV)]] [[hlsl::contained_type(int)]] [[hlsl::raw_buffer]];
+
+void test_args(int x, bool b) {
+  // expected-error at +1 {{too few arguments to function call, expected 2, have 1}}
+  __builtin_hlsl_buffer_update_counter(x);
+
+  // expected-error at +1 {{too many arguments to function call, expected 2, have 3}}
+  __builtin_hlsl_buffer_update_counter(x, x, x);
+
+  // expected-error at +1 {{used type 'int' where __hlsl_resource_t is required}}
+  __builtin_hlsl_buffer_update_counter(x, x);
+
+  bad_handle_not_raw_t bad1;
+  bad_handle_no_type_t bad2;
+  bad_handle_not_uav_t bad3;
+
+  // expected-error at +1 {{invalid __hlsl_resource_t type attributes}}
+  __builtin_hlsl_buffer_update_counter(bad1, 1);
+
+  // expected-error at +1 {{invalid __hlsl_resource_t type attributes}}
+  __builtin_hlsl_buffer_update_counter(bad2, 1);
+
+  // expected-error at +1 {{invalid __hlsl_resource_t type attributes}}
+  __builtin_hlsl_buffer_update_counter(bad3, 1);
+
+  handle_t res;
+
+  // expected-error at +1 {{argument 1 must be constant integer 1 or -1}}
+  __builtin_hlsl_buffer_update_counter(res, x);
+
+  // expected-error at +1 {{passing 'const char *' to parameter of incompatible type 'int'}}
+  __builtin_hlsl_buffer_update_counter(res, "1");
+  
+  // expected-error at +1 {{argument 1 must be constant integer 1 or -1}}
+  __builtin_hlsl_buffer_update_counter(res, 10);
+
+  // no error
+  __builtin_hlsl_buffer_update_counter(res, 1);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index dad60a2535cf4d..bf49ec6f6c6496 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -37,7 +37,7 @@ def int_dx_typedBufferStore
     : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty],
                             [IntrWriteMem]>;
 
-def int_dx_updateCounter
+def int_dx_bufferUpdateCounter
     : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
                             [IntrInaccessibleMemOrArgMemOnly]>;
     
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index e1157085832866..17b70062e58fa9 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -106,9 +106,14 @@ let TargetPrefix = "spv" in {
             [llvm_any_ty],
             [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
             [IntrNoMem]>;
+
   def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
   def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
 
+  def int_spv_bufferUpdateCounter
+      : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
+                              [IntrInaccessibleMemOrArgMemOnly]>;
+
   // Read a value from the image buffer. It does not translate directly to a
   // single OpImageRead because the result type is not necessarily a 4 element
   // vector.
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 9f124394363a38..7f50fc509a862a 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -647,7 +647,7 @@ class OpLowerer {
       case Intrinsic::dx_typedBufferStore:
         HasErrors |= lowerTypedBufferStore(F);
         break;
-      case Intrinsic::dx_updateCounter:
+      case Intrinsic::dx_bufferUpdateCounter:
         HasErrors |= lowerUpdateCounter(F);
         break;
       // TODO: this can be removed when

>From 8d184778ed78462a59f1c873e6193f51a6092a8b Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sat, 23 Nov 2024 20:43:05 +0300
Subject: [PATCH 08/12] [UTC] Add support for Xtensa (#117441)

Regenerate the failing test as well.
---
 llvm/test/CodeGen/Xtensa/mul.ll    | 500 ++++++++++++++++++++++++-----
 llvm/utils/UpdateTestChecks/asm.py |  17 +
 2 files changed, 441 insertions(+), 76 deletions(-)

diff --git a/llvm/test/CodeGen/Xtensa/mul.ll b/llvm/test/CodeGen/Xtensa/mul.ll
index 9b13897293dc1b..c5995bbc479a6a 100644
--- a/llvm/test/CodeGen/Xtensa/mul.ll
+++ b/llvm/test/CodeGen/Xtensa/mul.ll
@@ -4,7 +4,8 @@
 
 define signext i32 @square(i32 %a) nounwind {
 ; XTENSA-LABEL: square:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a8, .LCPI0_0
@@ -20,7 +21,8 @@ define signext i32 @square(i32 %a) nounwind {
 
 define signext i32 @mul(i32 %a, i32 %b) nounwind {
 ; XTENSA-LABEL: mul:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a8, .LCPI1_0
@@ -35,7 +37,8 @@ define signext i32 @mul(i32 %a, i32 %b) nounwind {
 
 define signext i32 @mul_constant(i32 %a) nounwind {
 ; XTENSA-LABEL: mul_constant:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a3, 5
@@ -51,7 +54,8 @@ define signext i32 @mul_constant(i32 %a) nounwind {
 
 define i32 @mul_pow2(i32 %a) nounwind {
 ; XTENSA-LABEL: mul_pow2:
-; XTENSA:         slli a2, a2, 3
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    slli a2, a2, 3
 ; XTENSA-NEXT:    ret
   %1 = mul i32 %a, 8
   ret i32 %1
@@ -59,7 +63,8 @@ define i32 @mul_pow2(i32 %a) nounwind {
 
 define i64 @mul64(i64 %a, i64 %b) nounwind {
 ; XTENSA-LABEL: mul64:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a8, .LCPI4_0
@@ -74,7 +79,8 @@ define i64 @mul64(i64 %a, i64 %b) nounwind {
 
 define i64 @mul64_constant(i64 %a) nounwind {
 ; XTENSA-LABEL: mul64_constant:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a4, 5
@@ -91,7 +97,8 @@ define i64 @mul64_constant(i64 %a) nounwind {
 
 define i32 @mulhs(i32 %a, i32 %b) nounwind {
 ; XTENSA-LABEL: mulhs:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    or a4, a3, a3
@@ -114,7 +121,8 @@ define i32 @mulhs(i32 %a, i32 %b) nounwind {
 
 define i32 @mulhs_positive_constant(i32 %a) nounwind {
 ; XTENSA-LABEL: mulhs_positive_constant:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    srai a3, a2, 31
@@ -136,7 +144,8 @@ define i32 @mulhs_positive_constant(i32 %a) nounwind {
 
 define i32 @mulhs_negative_constant(i32 %a) nounwind {
 ; XTENSA-LABEL: mulhs_negative_constant:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    srai a3, a2, 31
@@ -158,7 +167,8 @@ define i32 @mulhs_negative_constant(i32 %a) nounwind {
 
 define zeroext i32 @mulhu(i32 zeroext %a, i32 zeroext %b) nounwind {
 ; XTENSA-LABEL: mulhu:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    or a4, a3, a3
@@ -181,7 +191,8 @@ define zeroext i32 @mulhu(i32 zeroext %a, i32 zeroext %b) nounwind {
 
 define i32 @mulhsu(i32 %a, i32 %b) nounwind {
 ; XTENSA-LABEL: mulhsu:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    or a4, a3, a3
@@ -204,7 +215,8 @@ define i32 @mulhsu(i32 %a, i32 %b) nounwind {
 
 define i32 @mulhu_constant(i32 %a) nounwind {
 ; XTENSA-LABEL: mulhu_constant:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a4, 5
@@ -226,7 +238,8 @@ define i32 @mulhu_constant(i32 %a) nounwind {
 
 define i32 @muli32_p65(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_p65:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a3, 65
@@ -242,7 +255,8 @@ define i32 @muli32_p65(i32 %a) nounwind {
 
 define i32 @muli32_p63(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_p63:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a3, 63
@@ -258,7 +272,8 @@ define i32 @muli32_p63(i32 %a) nounwind {
 
 define i64 @muli64_p65(i64 %a) nounwind {
 ; XTENSA-LABEL: muli64_p65:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a4, 65
@@ -275,7 +290,8 @@ define i64 @muli64_p65(i64 %a) nounwind {
 
 define i64 @muli64_p63(i64 %a) nounwind {
 ; XTENSA-LABEL: muli64_p63:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a4, 63
@@ -292,7 +308,8 @@ define i64 @muli64_p63(i64 %a) nounwind {
 
 define i32 @muli32_m63(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_m63:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a3, -63
@@ -308,7 +325,8 @@ define i32 @muli32_m63(i32 %a) nounwind {
 
 define i32 @muli32_m65(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_m65:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a3, -65
@@ -324,7 +342,8 @@ define i32 @muli32_m65(i32 %a) nounwind {
 
 define i64 @muli64_m63(i64 %a) nounwind {
 ; XTENSA-LABEL: muli64_m63:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a4, -63
@@ -341,7 +360,8 @@ define i64 @muli64_m63(i64 %a) nounwind {
 
 define i64 @muli64_m65(i64 %a) nounwind {
 ; XTENSA-LABEL: muli64_m65:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a4, -65
@@ -358,7 +378,8 @@ define i64 @muli64_m65(i64 %a) nounwind {
 
 define i32 @muli32_p384(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_p384:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    movi a3, 384
@@ -374,7 +395,8 @@ define i32 @muli32_p384(i32 %a) nounwind {
 
 define i32 @muli32_p12288(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_p12288:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a3, .LCPI21_0
@@ -390,7 +412,8 @@ define i32 @muli32_p12288(i32 %a) nounwind {
 
 define i32 @muli32_p4352(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_p4352:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a3, .LCPI22_0
@@ -406,7 +429,8 @@ define i32 @muli32_p4352(i32 %a) nounwind {
 
 define i32 @muli32_p3840(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_p3840:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a3, .LCPI23_0
@@ -422,7 +446,8 @@ define i32 @muli32_p3840(i32 %a) nounwind {
 
 define i32 @muli32_m3840(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_m3840:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a3, .LCPI24_0
@@ -438,7 +463,8 @@ define i32 @muli32_m3840(i32 %a) nounwind {
 
 define i32 @muli32_m4352(i32 %a) nounwind {
 ; XTENSA-LABEL: muli32_m4352:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a3, .LCPI25_0
@@ -454,7 +480,8 @@ define i32 @muli32_m4352(i32 %a) nounwind {
 
 define i64 @muli64_p4352(i64 %a) nounwind {
 ; XTENSA-LABEL: muli64_p4352:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a4, .LCPI26_0
@@ -471,7 +498,8 @@ define i64 @muli64_p4352(i64 %a) nounwind {
 
 define i64 @muli64_p3840(i64 %a) nounwind {
 ; XTENSA-LABEL: muli64_p3840:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a4, .LCPI27_0
@@ -488,7 +516,8 @@ define i64 @muli64_p3840(i64 %a) nounwind {
 
 define i64 @muli64_m4352(i64 %a) nounwind {
 ; XTENSA-LABEL: muli64_m4352:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a4, .LCPI28_0
@@ -505,7 +534,8 @@ define i64 @muli64_m4352(i64 %a) nounwind {
 
 define i64 @muli64_m3840(i64 %a) nounwind {
 ; XTENSA-LABEL: muli64_m3840:
-; XTENSA:         addi a8, a1, -16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -16
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    s32i a0, a1, 0 # 4-byte Folded Spill
 ; XTENSA-NEXT:    l32r a4, .LCPI29_0
@@ -522,17 +552,123 @@ define i64 @muli64_m3840(i64 %a) nounwind {
 
 define i128 @muli128_m3840(i128 %a) nounwind {
 ; XTENSA-LABEL: muli128_m3840:
-; XTENSA:         addi a8, a1, -16
-; XTENSA-NEXT:    or a1, a8, a8
-; XTENSA-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
-; XTENSA-NEXT:    movi a7, -1
-; XTENSA-NEXT:    s32i a7, a1, 4
-; XTENSA-NEXT:    s32i a7, a1, 0
-; XTENSA-NEXT:    l32r a6, .LCPI30_0
-; XTENSA-NEXT:    l32r a8, .LCPI30_1
-; XTENSA-NEXT:    callx0 a8
-; XTENSA-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
-; XTENSA-NEXT:    addi a8, a1, 16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -80
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    s32i a0, a1, 64 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a12, a1, 60 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a13, a1, 56 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a14, a1, 52 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 48 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a5, a1, 20 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a4, a1, 16 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a15, a3, a3
+; XTENSA-NEXT:    l32r a14, .LCPI30_0
+; XTENSA-NEXT:    movi a12, 0
+; XTENSA-NEXT:    l32r a13, .LCPI30_1
+; XTENSA-NEXT:    s32i a2, a1, 36 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a2, a1, 28 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a3, a1, 44 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 40 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a2, a15, a15
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    s32i a14, a1, 12 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    l32i a8, a1, 44 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a15, a2, a8
+; XTENSA-NEXT:    movi a8, 1
+; XTENSA-NEXT:    s32i a8, a1, 44 # 4-byte Folded Spill
+; XTENSA-NEXT:    bltu a15, a2, .LBB30_2
+; XTENSA-NEXT:  # %bb.1:
+; XTENSA-NEXT:    or a8, a12, a12
+; XTENSA-NEXT:  .LBB30_2:
+; XTENSA-NEXT:    add a8, a3, a8
+; XTENSA-NEXT:    s32i a8, a1, 32 # 4-byte Folded Spill
+; XTENSA-NEXT:    movi a14, -1
+; XTENSA-NEXT:    l32i a2, a1, 36 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    add a9, a2, a15
+; XTENSA-NEXT:    l32i a8, a1, 44 # 4-byte Folded Reload
+; XTENSA-NEXT:    s32i a9, a1, 24 # 4-byte Folded Spill
+; XTENSA-NEXT:    bltu a9, a2, .LBB30_4
+; XTENSA-NEXT:  # %bb.3:
+; XTENSA-NEXT:    or a8, a12, a12
+; XTENSA-NEXT:  .LBB30_4:
+; XTENSA-NEXT:    add a8, a3, a8
+; XTENSA-NEXT:    l32i a9, a1, 32 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a15, a9, a8
+; XTENSA-NEXT:    l32i a2, a1, 40 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a3, a1, 4 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 8 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a2, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT:    add a15, a2, a15
+; XTENSA-NEXT:    l32i a2, a1, 16 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a3, a1, 20 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a4, a1, 12 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a5, a14, a14
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a2, a1, 16 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a3, a1, 20 # 4-byte Folded Spill
+; XTENSA-NEXT:    l32i a2, a1, 36 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a3, a1, 40 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a14, a14
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    l32i a8, a1, 16 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a9, a2, a8
+; XTENSA-NEXT:    add a4, a15, a9
+; XTENSA-NEXT:    l32i a7, a1, 44 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a8, a7, a7
+; XTENSA-NEXT:    bltu a4, a15, .LBB30_6
+; XTENSA-NEXT:  # %bb.5:
+; XTENSA-NEXT:    or a8, a12, a12
+; XTENSA-NEXT:  .LBB30_6:
+; XTENSA-NEXT:    or a10, a7, a7
+; XTENSA-NEXT:    l32i a11, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT:    bltu a15, a11, .LBB30_8
+; XTENSA-NEXT:  # %bb.7:
+; XTENSA-NEXT:    or a10, a12, a12
+; XTENSA-NEXT:  .LBB30_8:
+; XTENSA-NEXT:    or a11, a7, a7
+; XTENSA-NEXT:    l32i a6, a1, 32 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a5, a1, 8 # 4-byte Folded Reload
+; XTENSA-NEXT:    bltu a5, a6, .LBB30_10
+; XTENSA-NEXT:  # %bb.9:
+; XTENSA-NEXT:    or a11, a12, a12
+; XTENSA-NEXT:  .LBB30_10:
+; XTENSA-NEXT:    l32i a6, a1, 4 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a11, a6, a11
+; XTENSA-NEXT:    add a10, a11, a10
+; XTENSA-NEXT:    bltu a9, a2, .LBB30_12
+; XTENSA-NEXT:  # %bb.11:
+; XTENSA-NEXT:    or a7, a12, a12
+; XTENSA-NEXT:  .LBB30_12:
+; XTENSA-NEXT:    l32i a9, a1, 20 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a9, a3, a9
+; XTENSA-NEXT:    add a9, a9, a7
+; XTENSA-NEXT:    add a9, a10, a9
+; XTENSA-NEXT:    add a5, a9, a8
+; XTENSA-NEXT:    l32i a2, a1, 28 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a3, a1, 24 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a15, a1, 48 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a14, a1, 52 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a13, a1, 56 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a12, a1, 60 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a0, a1, 64 # 4-byte Folded Reload
+; XTENSA-NEXT:    addi a8, a1, 80
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    ret
   %1 = mul i128 %a, -3840
@@ -541,17 +677,123 @@ define i128 @muli128_m3840(i128 %a) nounwind {
 
 define i128 @muli128_m63(i128 %a) nounwind {
 ; XTENSA-LABEL: muli128_m63:
-; XTENSA:         addi a8, a1, -16
-; XTENSA-NEXT:    or a1, a8, a8
-; XTENSA-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
-; XTENSA-NEXT:    movi a7, -1
-; XTENSA-NEXT:    s32i a7, a1, 4
-; XTENSA-NEXT:    s32i a7, a1, 0
-; XTENSA-NEXT:    movi a6, -63
-; XTENSA-NEXT:    l32r a8, .LCPI31_0
-; XTENSA-NEXT:    callx0 a8
-; XTENSA-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
-; XTENSA-NEXT:    addi a8, a1, 16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -80
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    s32i a0, a1, 64 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a12, a1, 60 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a13, a1, 56 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a14, a1, 52 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 48 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a5, a1, 20 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a4, a1, 16 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a15, a3, a3
+; XTENSA-NEXT:    movi a14, -63
+; XTENSA-NEXT:    movi a12, 0
+; XTENSA-NEXT:    l32r a13, .LCPI31_0
+; XTENSA-NEXT:    s32i a2, a1, 36 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a2, a1, 28 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a3, a1, 44 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 40 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a2, a15, a15
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    s32i a14, a1, 12 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    l32i a8, a1, 44 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a15, a2, a8
+; XTENSA-NEXT:    movi a8, 1
+; XTENSA-NEXT:    s32i a8, a1, 44 # 4-byte Folded Spill
+; XTENSA-NEXT:    bltu a15, a2, .LBB31_2
+; XTENSA-NEXT:  # %bb.1:
+; XTENSA-NEXT:    or a8, a12, a12
+; XTENSA-NEXT:  .LBB31_2:
+; XTENSA-NEXT:    add a8, a3, a8
+; XTENSA-NEXT:    s32i a8, a1, 32 # 4-byte Folded Spill
+; XTENSA-NEXT:    movi a14, -1
+; XTENSA-NEXT:    l32i a2, a1, 36 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    add a9, a2, a15
+; XTENSA-NEXT:    l32i a8, a1, 44 # 4-byte Folded Reload
+; XTENSA-NEXT:    s32i a9, a1, 24 # 4-byte Folded Spill
+; XTENSA-NEXT:    bltu a9, a2, .LBB31_4
+; XTENSA-NEXT:  # %bb.3:
+; XTENSA-NEXT:    or a8, a12, a12
+; XTENSA-NEXT:  .LBB31_4:
+; XTENSA-NEXT:    add a8, a3, a8
+; XTENSA-NEXT:    l32i a9, a1, 32 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a15, a9, a8
+; XTENSA-NEXT:    l32i a2, a1, 40 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a3, a1, 4 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 8 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a2, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT:    add a15, a2, a15
+; XTENSA-NEXT:    l32i a2, a1, 16 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a3, a1, 20 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a4, a1, 12 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a5, a14, a14
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a2, a1, 16 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a3, a1, 20 # 4-byte Folded Spill
+; XTENSA-NEXT:    l32i a2, a1, 36 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a3, a1, 40 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a14, a14
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    l32i a8, a1, 16 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a9, a2, a8
+; XTENSA-NEXT:    add a4, a15, a9
+; XTENSA-NEXT:    l32i a7, a1, 44 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a8, a7, a7
+; XTENSA-NEXT:    bltu a4, a15, .LBB31_6
+; XTENSA-NEXT:  # %bb.5:
+; XTENSA-NEXT:    or a8, a12, a12
+; XTENSA-NEXT:  .LBB31_6:
+; XTENSA-NEXT:    or a10, a7, a7
+; XTENSA-NEXT:    l32i a11, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT:    bltu a15, a11, .LBB31_8
+; XTENSA-NEXT:  # %bb.7:
+; XTENSA-NEXT:    or a10, a12, a12
+; XTENSA-NEXT:  .LBB31_8:
+; XTENSA-NEXT:    or a11, a7, a7
+; XTENSA-NEXT:    l32i a6, a1, 32 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a5, a1, 8 # 4-byte Folded Reload
+; XTENSA-NEXT:    bltu a5, a6, .LBB31_10
+; XTENSA-NEXT:  # %bb.9:
+; XTENSA-NEXT:    or a11, a12, a12
+; XTENSA-NEXT:  .LBB31_10:
+; XTENSA-NEXT:    l32i a6, a1, 4 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a11, a6, a11
+; XTENSA-NEXT:    add a10, a11, a10
+; XTENSA-NEXT:    bltu a9, a2, .LBB31_12
+; XTENSA-NEXT:  # %bb.11:
+; XTENSA-NEXT:    or a7, a12, a12
+; XTENSA-NEXT:  .LBB31_12:
+; XTENSA-NEXT:    l32i a9, a1, 20 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a9, a3, a9
+; XTENSA-NEXT:    add a9, a9, a7
+; XTENSA-NEXT:    add a9, a10, a9
+; XTENSA-NEXT:    add a5, a9, a8
+; XTENSA-NEXT:    l32i a2, a1, 28 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a3, a1, 24 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a15, a1, 48 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a14, a1, 52 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a13, a1, 56 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a12, a1, 60 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a0, a1, 64 # 4-byte Folded Reload
+; XTENSA-NEXT:    addi a8, a1, 80
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    ret
   %1 = mul i128 %a, -63
@@ -560,22 +802,119 @@ define i128 @muli128_m63(i128 %a) nounwind {
 
 define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
 ; XTENSA-LABEL: mulhsu_i64:
-; XTENSA:         addi a8, a1, -16
-; XTENSA-NEXT:    or a1, a8, a8
-; XTENSA-NEXT:    s32i a0, a1, 8 # 4-byte Folded Spill
-; XTENSA-NEXT:    or a7, a5, a5
-; XTENSA-NEXT:    or a6, a4, a4
-; XTENSA-NEXT:    srai a8, a7, 31
-; XTENSA-NEXT:    s32i a8, a1, 4
-; XTENSA-NEXT:    s32i a8, a1, 0
-; XTENSA-NEXT:    movi a4, 0
-; XTENSA-NEXT:    l32r a8, .LCPI32_0
-; XTENSA-NEXT:    or a5, a4, a4
-; XTENSA-NEXT:    callx0 a8
-; XTENSA-NEXT:    or a2, a4, a4
-; XTENSA-NEXT:    or a3, a5, a5
-; XTENSA-NEXT:    l32i a0, a1, 8 # 4-byte Folded Reload
-; XTENSA-NEXT:    addi a8, a1, 16
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addi a8, a1, -64
+; XTENSA-NEXT:    or a1, a8, a8
+; XTENSA-NEXT:    s32i a0, a1, 56 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a12, a1, 52 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a13, a1, 48 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a14, a1, 44 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 40 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a5, a1, 28 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a14, a4, a4
+; XTENSA-NEXT:    or a15, a3, a3
+; XTENSA-NEXT:    movi a12, 0
+; XTENSA-NEXT:    l32r a13, .LCPI32_0
+; XTENSA-NEXT:    s32i a2, a1, 32 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a3, a1, 24 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a15, a1, 36 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a2, a15, a15
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    s32i a14, a1, 16 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a4, a14, a14
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    l32i a8, a1, 24 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a14, a2, a8
+; XTENSA-NEXT:    movi a15, 1
+; XTENSA-NEXT:    or a8, a15, a15
+; XTENSA-NEXT:    bltu a14, a2, .LBB32_2
+; XTENSA-NEXT:  # %bb.1:
+; XTENSA-NEXT:    or a8, a12, a12
+; XTENSA-NEXT:  .LBB32_2:
+; XTENSA-NEXT:    add a8, a3, a8
+; XTENSA-NEXT:    s32i a8, a1, 24 # 4-byte Folded Spill
+; XTENSA-NEXT:    l32i a2, a1, 32 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    l32i a4, a1, 28 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    add a9, a2, a14
+; XTENSA-NEXT:    s32i a15, a1, 20 # 4-byte Folded Spill
+; XTENSA-NEXT:    or a8, a15, a15
+; XTENSA-NEXT:    bltu a9, a2, .LBB32_4
+; XTENSA-NEXT:  # %bb.3:
+; XTENSA-NEXT:    or a8, a12, a12
+; XTENSA-NEXT:  .LBB32_4:
+; XTENSA-NEXT:    add a8, a3, a8
+; XTENSA-NEXT:    l32i a9, a1, 24 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a14, a9, a8
+; XTENSA-NEXT:    l32i a2, a1, 36 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a3, a12, a12
+; XTENSA-NEXT:    l32i a15, a1, 28 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a4, a15, a15
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a3, a1, 8 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a14, a1, 12 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a2, a1, 4 # 4-byte Folded Spill
+; XTENSA-NEXT:    add a14, a2, a14
+; XTENSA-NEXT:    l32i a2, a1, 16 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a3, a15, a15
+; XTENSA-NEXT:    or a4, a12, a12
+; XTENSA-NEXT:    or a5, a12, a12
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    s32i a2, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT:    s32i a3, a1, 16 # 4-byte Folded Spill
+; XTENSA-NEXT:    srai a2, a15, 31
+; XTENSA-NEXT:    or a3, a2, a2
+; XTENSA-NEXT:    l32i a4, a1, 32 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a5, a1, 36 # 4-byte Folded Reload
+; XTENSA-NEXT:    callx0 a13
+; XTENSA-NEXT:    or a8, a2, a2
+; XTENSA-NEXT:    l32i a9, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a10, a8, a9
+; XTENSA-NEXT:    add a2, a14, a10
+; XTENSA-NEXT:    l32i a6, a1, 20 # 4-byte Folded Reload
+; XTENSA-NEXT:    or a9, a6, a6
+; XTENSA-NEXT:    bltu a2, a14, .LBB32_6
+; XTENSA-NEXT:  # %bb.5:
+; XTENSA-NEXT:    or a9, a12, a12
+; XTENSA-NEXT:  .LBB32_6:
+; XTENSA-NEXT:    or a11, a6, a6
+; XTENSA-NEXT:    l32i a7, a1, 4 # 4-byte Folded Reload
+; XTENSA-NEXT:    bltu a14, a7, .LBB32_8
+; XTENSA-NEXT:  # %bb.7:
+; XTENSA-NEXT:    or a11, a12, a12
+; XTENSA-NEXT:  .LBB32_8:
+; XTENSA-NEXT:    or a7, a6, a6
+; XTENSA-NEXT:    l32i a5, a1, 24 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a4, a1, 12 # 4-byte Folded Reload
+; XTENSA-NEXT:    bltu a4, a5, .LBB32_10
+; XTENSA-NEXT:  # %bb.9:
+; XTENSA-NEXT:    or a7, a12, a12
+; XTENSA-NEXT:  .LBB32_10:
+; XTENSA-NEXT:    l32i a5, a1, 8 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a7, a5, a7
+; XTENSA-NEXT:    add a11, a7, a11
+; XTENSA-NEXT:    bltu a10, a8, .LBB32_12
+; XTENSA-NEXT:  # %bb.11:
+; XTENSA-NEXT:    or a6, a12, a12
+; XTENSA-NEXT:  .LBB32_12:
+; XTENSA-NEXT:    l32i a8, a1, 16 # 4-byte Folded Reload
+; XTENSA-NEXT:    add a8, a3, a8
+; XTENSA-NEXT:    add a8, a8, a6
+; XTENSA-NEXT:    add a8, a11, a8
+; XTENSA-NEXT:    add a3, a8, a9
+; XTENSA-NEXT:    l32i a15, a1, 40 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a14, a1, 44 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a13, a1, 48 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a12, a1, 52 # 4-byte Folded Reload
+; XTENSA-NEXT:    l32i a0, a1, 56 # 4-byte Folded Reload
+; XTENSA-NEXT:    addi a8, a1, 64
 ; XTENSA-NEXT:    or a1, a8, a8
 ; XTENSA-NEXT:    ret
   %1 = zext i64 %a to i128
@@ -588,7 +927,8 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
 
 define i8 @muladd_demand(i8 %x, i8 %y) nounwind {
 ; XTENSA-LABEL: muladd_demand:
-; XTENSA:         slli a8, a2, 1
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    slli a8, a2, 1
 ; XTENSA-NEXT:    sub a8, a3, a8
 ; XTENSA-NEXT:    movi a9, 15
 ; XTENSA-NEXT:    and a2, a8, a9
@@ -601,7 +941,8 @@ define i8 @muladd_demand(i8 %x, i8 %y) nounwind {
 
 define i8 @mulsub_demand(i8 %x, i8 %y) nounwind {
 ; XTENSA-LABEL: mulsub_demand:
-; XTENSA:         addx2 a8, a2, a3
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addx2 a8, a2, a3
 ; XTENSA-NEXT:    movi a9, 15
 ; XTENSA-NEXT:    and a2, a8, a9
 ; XTENSA-NEXT:    ret
@@ -613,7 +954,8 @@ define i8 @mulsub_demand(i8 %x, i8 %y) nounwind {
 
 define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind {
 ; XTENSA-LABEL: muladd_demand_2:
-; XTENSA:         slli a8, a2, 1
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    slli a8, a2, 1
 ; XTENSA-NEXT:    sub a8, a3, a8
 ; XTENSA-NEXT:    movi a9, -16
 ; XTENSA-NEXT:    or a2, a8, a9
@@ -626,7 +968,8 @@ define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind {
 
 define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind {
 ; XTENSA-LABEL: mulsub_demand_2:
-; XTENSA:         addx2 a8, a2, a3
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    addx2 a8, a2, a3
 ; XTENSA-NEXT:    movi a9, -16
 ; XTENSA-NEXT:    or a2, a8, a9
 ; XTENSA-NEXT:    ret
@@ -638,7 +981,8 @@ define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind {
 
 define signext i32 @mul_imm_2(i32 %a) nounwind {
 ; XTENSA-LABEL: mul_imm_2:
-; XTENSA:         slli a2, a2, 1
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    slli a2, a2, 1
 ; XTENSA-NEXT:    ret
   %1 = mul i32 %a, 2
   ret i32 %1
@@ -646,7 +990,8 @@ define signext i32 @mul_imm_2(i32 %a) nounwind {
 
 define signext i32 @mul_imm_1024(i32 %a) nounwind {
 ; XTENSA-LABEL: mul_imm_1024:
-; XTENSA:         slli a2, a2, 10
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    slli a2, a2, 10
 ; XTENSA-NEXT:    ret
   %1 = mul i32 %a, 1024
   ret i32 %1
@@ -654,7 +999,8 @@ define signext i32 @mul_imm_1024(i32 %a) nounwind {
 
 define signext i32 @mul_imm_16384(i32 %a) nounwind {
 ; XTENSA-LABEL: mul_imm_16384:
-; XTENSA:         slli a2, a2, 14
+; XTENSA:       # %bb.0:
+; XTENSA-NEXT:    slli a2, a2, 14
 ; XTENSA-NEXT:    ret
   %1 = mul i32 %a, 16384
   ret i32 %1
@@ -662,7 +1008,9 @@ define signext i32 @mul_imm_16384(i32 %a) nounwind {
 
 define <4 x i32> @mul_vec_splat_constant(<4 x i32> %a) {
 ; XTENSA-LABEL: mul_vec_splat_constant:
-; XTENSA:         slli a2, a2, 2
+; XTENSA:         .cfi_startproc
+; XTENSA-NEXT:  # %bb.0:
+; XTENSA-NEXT:    slli a2, a2, 2
 ; XTENSA-NEXT:    slli a3, a3, 2
 ; XTENSA-NEXT:    slli a4, a4, 2
 ; XTENSA-NEXT:    slli a5, a5, 2
diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index f05d8b89e73b93..7d4fb7d8e15045 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -222,6 +222,11 @@ class string:
     flags=(re.M | re.S),
 )
 
+ASM_FUNCTION_XTENSA_RE = re.compile(
+    r"^(?P<func>[^:]+): +# @(?P=func)\n(?P<body>.*?)\n\.Lfunc_end\d+:\n",
+    flags=(re.M | re.S),
+)
+
 ASM_FUNCTION_CSKY_RE = re.compile(
     r"^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n(?:\s*\.?Lfunc_begin[^:\n]*:\n)?[^:]*?"
     r"(?P<body>^##?[ \t]+[^:]+:.*?)\s*"
@@ -492,6 +497,17 @@ def scrub_asm_ve(asm, args):
     return asm
 
 
+def scrub_asm_xtensa(asm, args):
+    # Scrub runs of whitespace out of the assembly, but leave the leading
+    # whitespace in place.
+    asm = common.SCRUB_WHITESPACE_RE.sub(r" ", asm)
+    # Expand the tabs used for indentation.
+    asm = string.expandtabs(asm, 2)
+    # Strip trailing whitespace.
+    asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r"", asm)
+    return asm
+
+
 def scrub_asm_csky(asm, args):
     # Scrub runs of whitespace out of the assembly, but leave the leading
     # whitespace in place.
@@ -576,6 +592,7 @@ def get_run_handler(triple):
         "wasm32": (scrub_asm_wasm, ASM_FUNCTION_WASM_RE),
         "wasm64": (scrub_asm_wasm, ASM_FUNCTION_WASM_RE),
         "ve": (scrub_asm_ve, ASM_FUNCTION_VE_RE),
+        "xtensa": (scrub_asm_xtensa, ASM_FUNCTION_XTENSA_RE),
         "csky": (scrub_asm_csky, ASM_FUNCTION_CSKY_RE),
         "nvptx": (scrub_asm_nvptx, ASM_FUNCTION_NVPTX_RE),
         "loongarch32": (scrub_asm_loongarch, ASM_FUNCTION_LOONGARCH_RE),

>From b122e0e3af92d381b2e4392d6baf393538ad6672 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Sat, 23 Nov 2024 10:29:11 -0800
Subject: [PATCH 09/12] [Sema] Fix a warning

This patch fixes:

  clang/lib/Sema/SemaHLSL.cpp:2225:32: error: absolute value function
  'abs' given an argument of type 'int64_t' (aka 'long') but has
  parameter of type 'int' which may cause truncation of value
  [-Werror,-Wabsolute-value]
---
 clang/lib/Sema/SemaHLSL.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 289d9dc0f11306..8bdacd6ded1654 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2222,7 +2222,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     Expr *OffsetExpr = TheCall->getArg(1);
     std::optional<llvm::APSInt> Offset =
         OffsetExpr->getIntegerConstantExpr(SemaRef.getASTContext());
-    if (!Offset.has_value() || abs(Offset->getExtValue()) != 1) {
+    if (!Offset.has_value() || std::abs(Offset->getExtValue()) != 1) {
       SemaRef.Diag(TheCall->getArg(1)->getBeginLoc(),
                    diag::err_hlsl_expect_arg_const_int_one_or_neg_one)
           << 1;

>From 25e62f095e8351fe610a82b08e91e15288c13b06 Mon Sep 17 00:00:00 2001
From: "M. Zeeshan Siddiqui" <mzs at microsoft.com>
Date: Sat, 23 Nov 2024 10:37:29 -0800
Subject: [PATCH 10/12] Allow SymbolUserOpInterface operators to be used in
 RemoveDeadValues Pass (#117405)

This change removes the restriction on `SymbolUserOpInterface` operators
so they can be used with operators that implement `SymbolOpInterface`,
example:

`memref.global` implements `SymbolOpInterface` so it can be used with
`memref.get_global` which implements `SymbolUserOpInterface`

```
// Define a global constant array
memref.global "private" constant @global_array : memref<10xi32> = dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : tensor<10xi32>

// Access this global constant within a function
func @use_global() {
  %0 = memref.get_global @global_array : memref<10xi32>
}
```

Reference: https://github.com/llvm/llvm-project/pull/116519 and
https://discourse.llvm.org/t/question-on-criteria-for-acceptable-ir-in-removedeadvaluespass/83131

---------

Co-authored-by: Zeeshan Siddiqui <mzs at ntdev.microsoft.com>
---
 mlir/lib/Transforms/RemoveDeadValues.cpp     | 6 ++----
 mlir/test/Transforms/remove-dead-values.mlir | 7 +++++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/mlir/lib/Transforms/RemoveDeadValues.cpp b/mlir/lib/Transforms/RemoveDeadValues.cpp
index b82280dda8ba73..0aa9dcb36681b3 100644
--- a/mlir/lib/Transforms/RemoveDeadValues.cpp
+++ b/mlir/lib/Transforms/RemoveDeadValues.cpp
@@ -577,10 +577,8 @@ void RemoveDeadValues::runOnOperation() {
   WalkResult acceptableIR = module->walk([&](Operation *op) {
     if (op == module)
       return WalkResult::advance();
-    if (isa<BranchOpInterface>(op) ||
-        (isa<SymbolUserOpInterface>(op) && !isa<CallOpInterface>(op))) {
-      op->emitError() << "cannot optimize an IR with "
-                         "non-call symbol user ops or branch ops\n";
+    if (isa<BranchOpInterface>(op)) {
+      op->emitError() << "cannot optimize an IR with branch ops\n";
       return WalkResult::interrupt();
     }
     return WalkResult::advance();
diff --git a/mlir/test/Transforms/remove-dead-values.mlir b/mlir/test/Transforms/remove-dead-values.mlir
index 47137fc6430fea..826f6159a36b67 100644
--- a/mlir/test/Transforms/remove-dead-values.mlir
+++ b/mlir/test/Transforms/remove-dead-values.mlir
@@ -3,9 +3,12 @@
 // The IR is updated regardless of memref.global private constant
 //
 module {
-  memref.global "private" constant @__something_global : memref<i32> = dense<0>
+  // CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> {alignment = 16 : i64}
+  memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> {alignment = 16 : i64}
   func.func @main(%arg0: i32) -> i32 {
     %0 = tensor.empty() : tensor<10xbf16>
+    // CHECK-NOT: memref.get_global
+    %1 = memref.get_global @__constant_4xi32 : memref<4xi32>
     // CHECK-NOT: tensor.empty
     return %arg0 : i32
   }
@@ -29,7 +32,7 @@ module @named_module_acceptable {
 //
 func.func @dont_touch_unacceptable_ir_has_cleanable_simple_op_with_branch_op(%arg0: i1) {
   %non_live = arith.constant 0 : i32
-  // expected-error @+1 {{cannot optimize an IR with non-call symbol user ops or branch ops}}
+  // expected-error @+1 {{cannot optimize an IR with branch ops}}
   cf.cond_br %arg0, ^bb1(%non_live : i32), ^bb2(%non_live : i32)
 ^bb1(%non_live_0 : i32):
   cf.br ^bb3

>From 240018ed552e87a9d55184e3217b7b3d5ce29f89 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sat, 23 Nov 2024 18:58:55 +0000
Subject: [PATCH 11/12] [AArch64][GlobalISel] Update and cleanup a number of
 gisel tests. NFC

Mostly removing unnecessary -global-isel-abort=2 or adding fallback messages
---
 llvm/test/CodeGen/AArch64/abs.ll              |    2 +-
 llvm/test/CodeGen/AArch64/arm64-clrsb.ll      |   56 +-
 .../test/CodeGen/AArch64/arm64-sli-sri-opt.ll |  617 +++--
 llvm/test/CodeGen/AArch64/arm64-vclz.ll       |  230 +-
 llvm/test/CodeGen/AArch64/concat-vector.ll    |    2 +-
 .../CodeGen/AArch64/extract-subvec-combine.ll |    2 +-
 .../CodeGen/AArch64/extract-vector-elt-sve.ll |    7 +
 llvm/test/CodeGen/AArch64/fcvt-fixed.ll       | 1888 +++++++++----
 .../AArch64/fixed-vector-deinterleave.ll      |    2 +-
 .../CodeGen/AArch64/fp-intrinsics-fp16.ll     |   77 +-
 .../CodeGen/AArch64/fp-intrinsics-vector.ll   |   85 +-
 llvm/test/CodeGen/AArch64/fp-intrinsics.ll    | 2340 +++++++++++++----
 .../test/CodeGen/AArch64/fptosi-sat-scalar.ll |    4 +-
 .../test/CodeGen/AArch64/fptoui-sat-scalar.ll |    4 +-
 llvm/test/CodeGen/AArch64/funnel-shift.ll     |    2 +-
 llvm/test/CodeGen/AArch64/itofp-bf16.ll       |   57 +
 llvm/test/CodeGen/AArch64/mingw-refptr.ll     |   92 +-
 llvm/test/CodeGen/AArch64/mulcmle.ll          |    2 +-
 llvm/test/CodeGen/AArch64/overflow.ll         |   55 +-
 llvm/test/CodeGen/AArch64/phi.ll              |    2 +-
 llvm/test/CodeGen/AArch64/sadd_sat.ll         |    2 +-
 llvm/test/CodeGen/AArch64/sadd_sat_plus.ll    |    2 +-
 llvm/test/CodeGen/AArch64/sadd_sat_vec.ll     |    4 +
 llvm/test/CodeGen/AArch64/sext.ll             |    2 +-
 llvm/test/CodeGen/AArch64/ssub_sat.ll         |    2 +-
 llvm/test/CodeGen/AArch64/ssub_sat_plus.ll    |    2 +-
 llvm/test/CodeGen/AArch64/ssub_sat_vec.ll     |    4 +
 llvm/test/CodeGen/AArch64/uadd_sat.ll         |    2 +-
 llvm/test/CodeGen/AArch64/uadd_sat_plus.ll    |    2 +-
 llvm/test/CodeGen/AArch64/uadd_sat_vec.ll     |    4 +
 llvm/test/CodeGen/AArch64/usub_sat.ll         |    2 +-
 llvm/test/CodeGen/AArch64/usub_sat_plus.ll    |    2 +-
 llvm/test/CodeGen/AArch64/usub_sat_vec.ll     |    4 +
 .../AArch64/vecreduce-umax-legalization.ll    |    2 +-
 34 files changed, 4196 insertions(+), 1366 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index 25a14ef9a49ee8..d501d9ed24547a 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; ===== Legal Scalars =====
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
index 412c2b00a5ac09..9c54238c68e2c6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll
@@ -1,78 +1,68 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 |  FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -O0 -pass-remarks-missed=gisel* -global-isel-abort=2 |  FileCheck %s --check-prefixes=GISEL,FALLBACK
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 |  FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0.0 -global-isel |  FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
-; Function Attrs: nounwind readnone
 declare i32 @llvm.ctlz.i32(i32, i1) #0
 declare i64 @llvm.ctlz.i64(i64, i1) #1
 
-; Function Attrs: nounwind ssp
-; FALLBACK-NOT: remark{{.*}}clrsb32
 define i32 @clrsb32(i32 %x) #2 {
+; CHECK-LABEL: clrsb32:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    cls w0, w0
+; CHECK-NEXT:    ret
 entry:
   %shr = ashr i32 %x, 31
   %xor = xor i32 %shr, %x
   %mul = shl i32 %xor, 1
   %add = or i32 %mul, 1
   %0 = tail call i32 @llvm.ctlz.i32(i32 %add, i1 false)
-
   ret i32 %0
-; CHECK-LABEL: clrsb32
-; CHECK:   cls [[TEMP:w[0-9]+]], [[TEMP]]
-
-; GISEL-LABEL: clrsb32
-; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]]
 }
 
-; Function Attrs: nounwind ssp
-; FALLBACK-NOT: remark{{.*}}clrsb64
 define i64 @clrsb64(i64 %x) #3 {
+; CHECK-LABEL: clrsb64:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    cls x0, x0
+; CHECK-NEXT:    ret
 entry:
   %shr = ashr i64 %x, 63
   %xor = xor i64 %shr, %x
   %mul = shl nsw i64 %xor, 1
   %add = or i64 %mul, 1
   %0 = tail call i64 @llvm.ctlz.i64(i64 %add, i1 false)
-
   ret i64 %0
-; CHECK-LABEL: clrsb64
-; CHECK:   cls [[TEMP:x[0-9]+]], [[TEMP]]
-; GISEL-LABEL: clrsb64
-; GISEL:   cls [[TEMP:x[0-9]+]], [[TEMP]]
 }
 
-; Function Attrs: nounwind ssp
-; FALLBACK-NOT: remark{{.*}}clrsb32_zeroundef
 define i32 @clrsb32_zeroundef(i32 %x) #2 {
+; CHECK-LABEL: clrsb32_zeroundef:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    cls w0, w0
+; CHECK-NEXT:    ret
 entry:
   %shr = ashr i32 %x, 31
   %xor = xor i32 %shr, %x
   %mul = shl i32 %xor, 1
   %add = or i32 %mul, 1
   %0 = tail call i32 @llvm.ctlz.i32(i32 %add, i1 true)
-
   ret i32 %0
-; CHECK-LABEL: clrsb32_zeroundef
-; CHECK:   cls [[TEMP:w[0-9]+]], [[TEMP]]
-
-; GISEL-LABEL: clrsb32_zeroundef
-; GISEL: cls [[TEMP:w[0-9]+]], [[TEMP]]
 }
 
-; Function Attrs: nounwind ssp
-; FALLBACK-NOT: remark{{.*}}clrsb64
 define i64 @clrsb64_zeroundef(i64 %x) #3 {
+; CHECK-LABEL: clrsb64_zeroundef:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    cls x0, x0
+; CHECK-NEXT:    ret
 entry:
   %shr = ashr i64 %x, 63
   %xor = xor i64 %shr, %x
   %mul = shl nsw i64 %xor, 1
   %add = or i64 %mul, 1
   %0 = tail call i64 @llvm.ctlz.i64(i64 %add, i1 true)
-
   ret i64 %0
-; CHECK-LABEL: clrsb64_zeroundef
-; CHECK:   cls [[TEMP:x[0-9]+]], [[TEMP]]
-; GISEL-LABEL: clrsb64_zeroundef
-; GISEL:   cls [[TEMP:x[0-9]+]], [[TEMP]]
 }
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
index 475affa358bd15..0e1e15f9b6b912 100644
--- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -1,12 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define void @testLeftGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftGood8x8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sli.8b v0, v1, #3
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftGood8x8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sli.8b v0, v1, #3
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftGood8x8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.8b v2, #7
+; CHECK-GI-NEXT:    shl.8b v1, v1, #3
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <8 x i8> %src1, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   %vshl_n = shl <8 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   %result = or <8 x i8> %and.i, %vshl_n
@@ -15,14 +25,23 @@ define void @testLeftGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind
 }
 
 define void @testLeftBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftBad8x8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi.8b v2, #165
-; CHECK-NEXT:    add.8b v1, v1, v1
-; CHECK-NEXT:    and.8b v0, v0, v2
-; CHECK-NEXT:    orr.8b v0, v0, v1
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftBad8x8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi.8b v2, #165
+; CHECK-SD-NEXT:    add.8b v1, v1, v1
+; CHECK-SD-NEXT:    and.8b v0, v0, v2
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftBad8x8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.8b v2, #165
+; CHECK-GI-NEXT:    shl.8b v1, v1, #1
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <8 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>
   %vshl_n = shl <8 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %result = or <8 x i8> %and.i, %vshl_n
@@ -31,11 +50,20 @@ define void @testLeftBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind
 }
 
 define void @testRightGood8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightGood8x8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sri.8b v0, v1, #3
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightGood8x8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sri.8b v0, v1, #3
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightGood8x8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.8b v2, #224
+; CHECK-GI-NEXT:    ushr.8b v1, v1, #3
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <8 x i8> %src1, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
   %vshl_n = lshr <8 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   %result = or <8 x i8> %and.i, %vshl_n
@@ -60,11 +88,20 @@ define void @testRightBad8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind
 }
 
 define void @testLeftGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftGood16x8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sli.16b v0, v1, #3
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftGood16x8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sli.16b v0, v1, #3
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftGood16x8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.16b v2, #7
+; CHECK-GI-NEXT:    shl.16b v1, v1, #3
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <16 x i8> %src1, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   %vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   %result = or <16 x i8> %and.i, %vshl_n
@@ -73,14 +110,23 @@ define void @testLeftGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounw
 }
 
 define void @testLeftBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftBad16x8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi.16b v2, #165
-; CHECK-NEXT:    add.16b v1, v1, v1
-; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    orr.16b v0, v0, v1
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftBad16x8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi.16b v2, #165
+; CHECK-SD-NEXT:    add.16b v1, v1, v1
+; CHECK-SD-NEXT:    and.16b v0, v0, v2
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftBad16x8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.16b v2, #165
+; CHECK-GI-NEXT:    shl.16b v1, v1, #1
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <16 x i8> %src1, <i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165, i8 165>
   %vshl_n = shl <16 x i8> %src2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %result = or <16 x i8> %and.i, %vshl_n
@@ -89,11 +135,20 @@ define void @testLeftBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwi
 }
 
 define void @testRightGood16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightGood16x8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sri.16b v0, v1, #3
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightGood16x8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sri.16b v0, v1, #3
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightGood16x8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.16b v2, #224
+; CHECK-GI-NEXT:    ushr.16b v1, v1, #3
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <16 x i8> %src1, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
   %vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   %result = or <16 x i8> %and.i, %vshl_n
@@ -118,11 +173,20 @@ define void @testRightBad16x8(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) nounw
 }
 
 define void @testLeftGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftGood4x16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sli.4h v0, v1, #14
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftGood4x16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sli.4h v0, v1, #14
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftGood4x16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mvni.4h v2, #192, lsl #8
+; CHECK-GI-NEXT:    shl.4h v1, v1, #14
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <4 x i16> %src1, <i16 16383, i16 16383, i16 16383, i16 16383>
   %vshl_n = shl <4 x i16> %src2, <i16 14, i16 14, i16 14, i16 14>
   %result = or <4 x i16> %and.i, %vshl_n
@@ -131,15 +195,25 @@ define void @testLeftGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounw
 }
 
 define void @testLeftBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftBad4x16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #16500
-; CHECK-NEXT:    shl.4h v1, v1, #14
-; CHECK-NEXT:    dup.4h v2, w8
-; CHECK-NEXT:    and.8b v0, v0, v2
-; CHECK-NEXT:    orr.8b v0, v0, v1
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftBad4x16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #16500 // =0x4074
+; CHECK-SD-NEXT:    shl.4h v1, v1, #14
+; CHECK-SD-NEXT:    dup.4h v2, w8
+; CHECK-SD-NEXT:    and.8b v0, v0, v2
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftBad4x16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI9_0
+; CHECK-GI-NEXT:    shl.4h v1, v1, #14
+; CHECK-GI-NEXT:    ldr d2, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <4 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500>
   %vshl_n = shl <4 x i16> %src2, <i16 14, i16 14, i16 14, i16 14>
   %result = or <4 x i16> %and.i, %vshl_n
@@ -148,11 +222,20 @@ define void @testLeftBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwi
 }
 
 define void @testRightGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightGood4x16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sri.4h v0, v1, #14
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightGood4x16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sri.4h v0, v1, #14
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightGood4x16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mvni.4h v2, #3
+; CHECK-GI-NEXT:    ushr.4h v1, v1, #14
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <4 x i16> %src1, <i16 65532, i16 65532, i16 65532, i16 65532>
   %vshl_n = lshr <4 x i16> %src2, <i16 14, i16 14, i16 14, i16 14>
   %result = or <4 x i16> %and.i, %vshl_n
@@ -161,14 +244,24 @@ define void @testRightGood4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) noun
 }
 
 define void @testRightBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightBad4x16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #16500
-; CHECK-NEXT:    dup.4h v2, w8
-; CHECK-NEXT:    and.8b v0, v0, v2
-; CHECK-NEXT:    usra.4h v0, v1, #14
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightBad4x16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #16500 // =0x4074
+; CHECK-SD-NEXT:    dup.4h v2, w8
+; CHECK-SD-NEXT:    and.8b v0, v0, v2
+; CHECK-SD-NEXT:    usra.4h v0, v1, #14
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightBad4x16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI11_0
+; CHECK-GI-NEXT:    ushr.4h v1, v1, #14
+; CHECK-GI-NEXT:    ldr d2, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <4 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500>
   %vshl_n = lshr <4 x i16> %src2, <i16 14, i16 14, i16 14, i16 14>
   %result = or <4 x i16> %and.i, %vshl_n
@@ -177,11 +270,20 @@ define void @testRightBad4x16(<4 x i16> %src1, <4 x i16> %src2, ptr %dest) nounw
 }
 
 define void @testLeftGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftGood8x16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sli.8h v0, v1, #14
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftGood8x16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sli.8h v0, v1, #14
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftGood8x16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mvni.8h v2, #192, lsl #8
+; CHECK-GI-NEXT:    shl.8h v1, v1, #14
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <8 x i16> %src1, <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
   %vshl_n = shl <8 x i16> %src2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
   %result = or <8 x i16> %and.i, %vshl_n
@@ -190,15 +292,25 @@ define void @testLeftGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounw
 }
 
 define void @testLeftBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftBad8x16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #16500
-; CHECK-NEXT:    shl.8h v1, v1, #14
-; CHECK-NEXT:    dup.8h v2, w8
-; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    orr.16b v0, v0, v1
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftBad8x16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #16500 // =0x4074
+; CHECK-SD-NEXT:    shl.8h v1, v1, #14
+; CHECK-SD-NEXT:    dup.8h v2, w8
+; CHECK-SD-NEXT:    and.16b v0, v0, v2
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftBad8x16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
+; CHECK-GI-NEXT:    shl.8h v1, v1, #14
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <8 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500>
   %vshl_n = shl <8 x i16> %src2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
   %result = or <8 x i16> %and.i, %vshl_n
@@ -207,11 +319,20 @@ define void @testLeftBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwi
 }
 
 define void @testRightGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightGood8x16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sri.8h v0, v1, #14
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightGood8x16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sri.8h v0, v1, #14
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightGood8x16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mvni.8h v2, #3
+; CHECK-GI-NEXT:    ushr.8h v1, v1, #14
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <8 x i16> %src1, <i16 65532, i16 65532, i16 65532, i16 65532, i16 65532, i16 65532, i16 65532, i16 65532>
   %vshl_n = lshr <8 x i16> %src2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
   %result = or <8 x i16> %and.i, %vshl_n
@@ -220,14 +341,24 @@ define void @testRightGood8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) noun
 }
 
 define void @testRightBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightBad8x16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #16500
-; CHECK-NEXT:    dup.8h v2, w8
-; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    usra.8h v0, v1, #14
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightBad8x16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #16500 // =0x4074
+; CHECK-SD-NEXT:    dup.8h v2, w8
+; CHECK-SD-NEXT:    and.16b v0, v0, v2
+; CHECK-SD-NEXT:    usra.8h v0, v1, #14
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightBad8x16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
+; CHECK-GI-NEXT:    ushr.8h v1, v1, #14
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI15_0]
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <8 x i16> %src1, <i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500, i16 16500>
   %vshl_n = lshr <8 x i16> %src2, <i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14, i16 14>
   %result = or <8 x i16> %and.i, %vshl_n
@@ -236,11 +367,20 @@ define void @testRightBad8x16(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) nounw
 }
 
 define void @testLeftGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftGood2x32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sli.2s v0, v1, #22
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftGood2x32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sli.2s v0, v1, #22
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftGood2x32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.2s v2, #63, msl #16
+; CHECK-GI-NEXT:    shl.2s v1, v1, #22
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <2 x i32> %src1, <i32 4194303, i32 4194303>
   %vshl_n = shl <2 x i32> %src2, <i32 22, i32 22>
   %result = or <2 x i32> %and.i, %vshl_n
@@ -249,15 +389,25 @@ define void @testLeftGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounw
 }
 
 define void @testLeftBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftBad2x32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #4194300
-; CHECK-NEXT:    shl.2s v1, v1, #22
-; CHECK-NEXT:    dup.2s v2, w8
-; CHECK-NEXT:    and.8b v0, v0, v2
-; CHECK-NEXT:    orr.8b v0, v0, v1
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftBad2x32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #4194300 // =0x3ffffc
+; CHECK-SD-NEXT:    shl.2s v1, v1, #22
+; CHECK-SD-NEXT:    dup.2s v2, w8
+; CHECK-SD-NEXT:    and.8b v0, v0, v2
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftBad2x32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI17_0
+; CHECK-GI-NEXT:    shl.2s v1, v1, #22
+; CHECK-GI-NEXT:    ldr d2, [x8, :lo12:.LCPI17_0]
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <2 x i32> %src1, <i32 4194300, i32 4194300>
   %vshl_n = shl <2 x i32> %src2, <i32 22, i32 22>
   %result = or <2 x i32> %and.i, %vshl_n
@@ -266,11 +416,20 @@ define void @testLeftBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwi
 }
 
 define void @testRightGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightGood2x32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sri.2s v0, v1, #22
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightGood2x32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sri.2s v0, v1, #22
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightGood2x32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mvni.2s v2, #3, msl #8
+; CHECK-GI-NEXT:    ushr.2s v1, v1, #22
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <2 x i32> %src1, <i32 4294966272, i32 4294966272>
   %vshl_n = lshr <2 x i32> %src2, <i32 22, i32 22>
   %result = or <2 x i32> %and.i, %vshl_n
@@ -279,15 +438,25 @@ define void @testRightGood2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) noun
 }
 
 define void @testRightBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightBad2x32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #4194300
-; CHECK-NEXT:    ushr.2s v1, v1, #22
-; CHECK-NEXT:    dup.2s v2, w8
-; CHECK-NEXT:    and.8b v0, v0, v2
-; CHECK-NEXT:    orr.8b v0, v0, v1
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightBad2x32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #4194300 // =0x3ffffc
+; CHECK-SD-NEXT:    ushr.2s v1, v1, #22
+; CHECK-SD-NEXT:    dup.2s v2, w8
+; CHECK-SD-NEXT:    and.8b v0, v0, v2
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightBad2x32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI19_0
+; CHECK-GI-NEXT:    ushr.2s v1, v1, #22
+; CHECK-GI-NEXT:    ldr d2, [x8, :lo12:.LCPI19_0]
+; CHECK-GI-NEXT:    and.8b v0, v0, v2
+; CHECK-GI-NEXT:    orr.8b v0, v0, v1
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <2 x i32> %src1, <i32 4194300, i32 4194300>
   %vshl_n = lshr <2 x i32> %src2, <i32 22, i32 22>
   %result = or <2 x i32> %and.i, %vshl_n
@@ -296,11 +465,20 @@ define void @testRightBad2x32(<2 x i32> %src1, <2 x i32> %src2, ptr %dest) nounw
 }
 
 define void @testLeftGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftGood4x32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sli.4s v0, v1, #22
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftGood4x32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sli.4s v0, v1, #22
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftGood4x32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.4s v2, #63, msl #16
+; CHECK-GI-NEXT:    shl.4s v1, v1, #22
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <4 x i32> %src1, <i32 4194303, i32 4194303, i32 4194303, i32 4194303>
   %vshl_n = shl <4 x i32> %src2, <i32 22, i32 22, i32 22, i32 22>
   %result = or <4 x i32> %and.i, %vshl_n
@@ -309,15 +487,25 @@ define void @testLeftGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounw
 }
 
 define void @testLeftBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftBad4x32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #4194300
-; CHECK-NEXT:    shl.4s v1, v1, #22
-; CHECK-NEXT:    dup.4s v2, w8
-; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    orr.16b v0, v0, v1
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftBad4x32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #4194300 // =0x3ffffc
+; CHECK-SD-NEXT:    shl.4s v1, v1, #22
+; CHECK-SD-NEXT:    dup.4s v2, w8
+; CHECK-SD-NEXT:    and.16b v0, v0, v2
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftBad4x32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI21_0
+; CHECK-GI-NEXT:    shl.4s v1, v1, #22
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <4 x i32> %src1, <i32 4194300, i32 4194300, i32 4194300, i32 4194300>
   %vshl_n = shl <4 x i32> %src2, <i32 22, i32 22, i32 22, i32 22>
   %result = or <4 x i32> %and.i, %vshl_n
@@ -326,11 +514,20 @@ define void @testLeftBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwi
 }
 
 define void @testRightGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightGood4x32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sri.4s v0, v1, #22
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightGood4x32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sri.4s v0, v1, #22
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightGood4x32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mvni.4s v2, #3, msl #8
+; CHECK-GI-NEXT:    ushr.4s v1, v1, #22
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <4 x i32> %src1, <i32 4294966272, i32 4294966272, i32 4294966272, i32 4294966272>
   %vshl_n = lshr <4 x i32> %src2, <i32 22, i32 22, i32 22, i32 22>
   %result = or <4 x i32> %and.i, %vshl_n
@@ -339,15 +536,25 @@ define void @testRightGood4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) noun
 }
 
 define void @testRightBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightBad4x32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #4194300
-; CHECK-NEXT:    ushr.4s v1, v1, #22
-; CHECK-NEXT:    dup.4s v2, w8
-; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    orr.16b v0, v0, v1
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightBad4x32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov w8, #4194300 // =0x3ffffc
+; CHECK-SD-NEXT:    ushr.4s v1, v1, #22
+; CHECK-SD-NEXT:    dup.4s v2, w8
+; CHECK-SD-NEXT:    and.16b v0, v0, v2
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightBad4x32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI23_0
+; CHECK-GI-NEXT:    ushr.4s v1, v1, #22
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI23_0]
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <4 x i32> %src1, <i32 4194300, i32 4194300, i32 4194300, i32 4194300>
   %vshl_n = lshr <4 x i32> %src2, <i32 22, i32 22, i32 22, i32 22>
   %result = or <4 x i32> %and.i, %vshl_n
@@ -356,11 +563,20 @@ define void @testRightBad4x32(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) nounw
 }
 
 define void @testLeftGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftGood2x64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sli.2d v0, v1, #48
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftGood2x64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sli.2d v0, v1, #48
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftGood2x64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.2d v2, #0x00ffffffffffff
+; CHECK-GI-NEXT:    shl.2d v1, v1, #48
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <2 x i64> %src1, <i64 281474976710655, i64 281474976710655>
   %vshl_n = shl <2 x i64> %src2, <i64 48, i64 48>
   %result = or <2 x i64> %and.i, %vshl_n
@@ -369,16 +585,26 @@ define void @testLeftGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounw
 }
 
 define void @testLeftBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftBad2x64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #10
-; CHECK-NEXT:    shl.2d v1, v1, #48
-; CHECK-NEXT:    movk x8, #1, lsl #48
-; CHECK-NEXT:    dup.2d v2, x8
-; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    orr.16b v0, v0, v1
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftBad2x64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov x8, #10 // =0xa
+; CHECK-SD-NEXT:    shl.2d v1, v1, #48
+; CHECK-SD-NEXT:    movk x8, #1, lsl #48
+; CHECK-SD-NEXT:    dup.2d v2, x8
+; CHECK-SD-NEXT:    and.16b v0, v0, v2
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftBad2x64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI25_0
+; CHECK-GI-NEXT:    shl.2d v1, v1, #48
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI25_0]
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <2 x i64> %src1, <i64 281474976710666, i64 281474976710666>
   %vshl_n = shl <2 x i64> %src2, <i64 48, i64 48>
   %result = or <2 x i64> %and.i, %vshl_n
@@ -387,11 +613,20 @@ define void @testLeftBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwi
 }
 
 define void @testRightGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightGood2x64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sri.2d v0, v1, #48
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightGood2x64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sri.2d v0, v1, #48
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightGood2x64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi.2d v2, #0xffffffffffff0000
+; CHECK-GI-NEXT:    ushr.2d v1, v1, #48
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <2 x i64> %src1, <i64 18446744073709486080, i64 18446744073709486080>
   %vshl_n = lshr <2 x i64> %src2, <i64 48, i64 48>
   %result = or <2 x i64> %and.i, %vshl_n
@@ -400,16 +635,26 @@ define void @testRightGood2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) noun
 }
 
 define void @testRightBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testRightBad2x64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #10
-; CHECK-NEXT:    ushr.2d v1, v1, #48
-; CHECK-NEXT:    movk x8, #1, lsl #48
-; CHECK-NEXT:    dup.2d v2, x8
-; CHECK-NEXT:    and.16b v0, v0, v2
-; CHECK-NEXT:    orr.16b v0, v0, v1
-; CHECK-NEXT:    str q0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testRightBad2x64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    mov x8, #10 // =0xa
+; CHECK-SD-NEXT:    ushr.2d v1, v1, #48
+; CHECK-SD-NEXT:    movk x8, #1, lsl #48
+; CHECK-SD-NEXT:    dup.2d v2, x8
+; CHECK-SD-NEXT:    and.16b v0, v0, v2
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    str q0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testRightBad2x64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI27_0
+; CHECK-GI-NEXT:    ushr.2d v1, v1, #48
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI27_0]
+; CHECK-GI-NEXT:    and.16b v0, v0, v2
+; CHECK-GI-NEXT:    orr.16b v0, v0, v1
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
   %and.i = and <2 x i64> %src1, <i64 281474976710666, i64 281474976710666>
   %vshl_n = lshr <2 x i64> %src2, <i64 48, i64 48>
   %result = or <2 x i64> %and.i, %vshl_n
@@ -418,11 +663,19 @@ define void @testRightBad2x64(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) nounw
 }
 
 define void @testLeftShouldNotCreateSLI1x128(<1 x i128> %src1, <1 x i128> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftShouldNotCreateSLI1x128:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    bfi x1, x2, #6, #58
-; CHECK-NEXT:    stp x0, x1, [x4]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: testLeftShouldNotCreateSLI1x128:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    bfi x1, x2, #6, #58
+; CHECK-SD-NEXT:    stp x0, x1, [x4]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: testLeftShouldNotCreateSLI1x128:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov.d v0[0], x0
+; CHECK-GI-NEXT:    bfi x1, x2, #6, #58
+; CHECK-GI-NEXT:    mov.d v0[1], x1
+; CHECK-GI-NEXT:    str q0, [x4]
+; CHECK-GI-NEXT:    ret
   %and.i = and <1 x i128> %src1, <i128 1180591620717411303423>
   %vshl_n = shl <1 x i128> %src2, <i128 70>
   %result = or <1 x i128> %and.i, %vshl_n
diff --git a/llvm/test/CodeGen/AArch64/arm64-vclz.ll b/llvm/test/CodeGen/AArch64/arm64-vclz.ll
index 38c0572e23f890..c65e75c89e8da9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vclz.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vclz.ll
@@ -1,154 +1,254 @@
-; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
-; FALLBACK-NOT: remark{{.*}}test_vclz_u8
 define <8 x i8> @test_vclz_u8(<8 x i8> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_u8:
-  ; CHECK: clz.8b v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclz_u8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.8b v0, v0
+; CHECK-NEXT:    ret
   %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind
   ret <8 x i8> %vclz.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclz_s8
 define <8 x i8> @test_vclz_s8(<8 x i8> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_s8:
-  ; CHECK: clz.8b v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclz_s8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.8b v0, v0
+; CHECK-NEXT:    ret
   %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) nounwind
   ret <8 x i8> %vclz.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclz_u16
 define <4 x i16> @test_vclz_u16(<4 x i16> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_u16:
-  ; CHECK: clz.4h v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclz_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.4h v0, v0
+; CHECK-NEXT:    ret
   %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind
   ret <4 x i16> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclz_s16
 define <4 x i16> @test_vclz_s16(<4 x i16> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_s16:
-  ; CHECK: clz.4h v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclz_s16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.4h v0, v0
+; CHECK-NEXT:    ret
   %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) nounwind
   ret <4 x i16> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclz_u32
 define <2 x i32> @test_vclz_u32(<2 x i32> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_u32:
-  ; CHECK: clz.2s v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclz_u32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.2s v0, v0
+; CHECK-NEXT:    ret
   %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind
   ret <2 x i32> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclz_s32
 define <2 x i32> @test_vclz_s32(<2 x i32> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_s32:
-  ; CHECK: clz.2s v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclz_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.2s v0, v0
+; CHECK-NEXT:    ret
   %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) nounwind
   ret <2 x i32> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclz_u64
 define <1 x i64> @test_vclz_u64(<1 x i64> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_u64:
+; CHECK-SD-LABEL: test_vclz_u64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushr d1, d0, #1
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #2
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #4
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #8
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #16
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #32
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    mvn.8b v0, v0
+; CHECK-SD-NEXT:    cnt.8b v0, v0
+; CHECK-SD-NEXT:    uaddlp.4h v0, v0
+; CHECK-SD-NEXT:    uaddlp.2s v0, v0
+; CHECK-SD-NEXT:    uaddlp.1d v0, v0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_vclz_u64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    clz x8, x8
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret
   %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
   ret <1 x i64> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclz_s64
 define <1 x i64> @test_vclz_s64(<1 x i64> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclz_s64:
+; CHECK-SD-LABEL: test_vclz_s64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushr d1, d0, #1
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #2
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #4
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #8
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #16
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    ushr d1, d0, #32
+; CHECK-SD-NEXT:    orr.8b v0, v0, v1
+; CHECK-SD-NEXT:    mvn.8b v0, v0
+; CHECK-SD-NEXT:    cnt.8b v0, v0
+; CHECK-SD-NEXT:    uaddlp.4h v0, v0
+; CHECK-SD-NEXT:    uaddlp.2s v0, v0
+; CHECK-SD-NEXT:    uaddlp.1d v0, v0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_vclz_s64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    clz x8, x8
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret
   %vclz1.i = tail call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %a, i1 false) nounwind
   ret <1 x i64> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclzq_u8
 define <16 x i8> @test_vclzq_u8(<16 x i8> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_u8:
-  ; CHECK: clz.16b v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclzq_u8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.16b v0, v0
+; CHECK-NEXT:    ret
   %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind
   ret <16 x i8> %vclz.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclzq_s8
 define <16 x i8> @test_vclzq_s8(<16 x i8> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_s8:
-  ; CHECK: clz.16b v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclzq_s8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.16b v0, v0
+; CHECK-NEXT:    ret
   %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) nounwind
   ret <16 x i8> %vclz.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclzq_u16
 define <8 x i16> @test_vclzq_u16(<8 x i16> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_u16:
-  ; CHECK: clz.8h v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclzq_u16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.8h v0, v0
+; CHECK-NEXT:    ret
   %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind
   ret <8 x i16> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclzq_s16
 define <8 x i16> @test_vclzq_s16(<8 x i16> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_s16:
-  ; CHECK: clz.8h v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclzq_s16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.8h v0, v0
+; CHECK-NEXT:    ret
   %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) nounwind
   ret <8 x i16> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclzq_u32
 define <4 x i32> @test_vclzq_u32(<4 x i32> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_u32:
-  ; CHECK: clz.4s v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclzq_u32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.4s v0, v0
+; CHECK-NEXT:    ret
   %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind
   ret <4 x i32> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclzq_s32
 define <4 x i32> @test_vclzq_s32(<4 x i32> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_s32:
-  ; CHECK: clz.4s v0, v0
-  ; CHECK-NEXT: ret
+; CHECK-LABEL: test_vclzq_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    clz.4s v0, v0
+; CHECK-NEXT:    ret
   %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) nounwind
   ret <4 x i32> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclzq_u64
 define <2 x i64> @test_vclzq_u64(<2 x i64> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_u64:
+; CHECK-SD-LABEL: test_vclzq_u64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #1
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #2
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #4
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #8
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #16
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #32
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    mvn.16b v0, v0
+; CHECK-SD-NEXT:    cnt.16b v0, v0
+; CHECK-SD-NEXT:    uaddlp.8h v0, v0
+; CHECK-SD-NEXT:    uaddlp.4s v0, v0
+; CHECK-SD-NEXT:    uaddlp.2d v0, v0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_vclzq_u64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov.d x9, v0[1]
+; CHECK-GI-NEXT:    clz x8, x8
+; CHECK-GI-NEXT:    mov.d v0[0], x8
+; CHECK-GI-NEXT:    clz x8, x9
+; CHECK-GI-NEXT:    mov.d v0[1], x8
+; CHECK-GI-NEXT:    ret
   %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
   ret <2 x i64> %vclz1.i
 }
 
-; FALLBACK-NOT: remark{{.*}}test_vclzq_s64
 define <2 x i64> @test_vclzq_s64(<2 x i64> %a) nounwind readnone ssp {
-  ; CHECK-LABEL: test_vclzq_s64:
+; CHECK-SD-LABEL: test_vclzq_s64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #1
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #2
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #4
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #8
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #16
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    ushr.2d v1, v0, #32
+; CHECK-SD-NEXT:    orr.16b v0, v0, v1
+; CHECK-SD-NEXT:    mvn.16b v0, v0
+; CHECK-SD-NEXT:    cnt.16b v0, v0
+; CHECK-SD-NEXT:    uaddlp.8h v0, v0
+; CHECK-SD-NEXT:    uaddlp.4s v0, v0
+; CHECK-SD-NEXT:    uaddlp.2d v0, v0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_vclzq_s64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    mov.d x9, v0[1]
+; CHECK-GI-NEXT:    clz x8, x8
+; CHECK-GI-NEXT:    mov.d v0[0], x8
+; CHECK-GI-NEXT:    clz x8, x9
+; CHECK-GI-NEXT:    mov.d v0[1], x8
+; CHECK-GI-NEXT:    ret
   %vclz1.i = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) nounwind
   ret <2 x i64> %vclz1.i
 }
 
 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
-
 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
-
 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
-
 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
-
 declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>, i1) nounwind readnone
-
 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
-
 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
-
 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index d9aaae20afc69e..d4d89a7c9c22e5 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) {
 ; CHECK-SD-LABEL: concat1:
diff --git a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
index 43c6e01911462a..75d55773b3681e 100644
--- a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
+++ b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <2 x i32> @and_extract_zext_idx0(<4 x i16> %vec) nounwind {
 ; CHECK-SD-LABEL: and_extract_zext_idx0:
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
index d18af3d5ae9450..7705d8949ca1ed 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll
@@ -2,6 +2,13 @@
 ; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc -mtriple=aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
+; CHECK-GI:       warning: Instruction selection used fallback path for insert_vscale_8_i16_zero
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for insert_vscale_8_i16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for insert_vscale_16_i8_zero
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for insert_vscale_16_i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for extract_vscale_16_i8
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for extract_vscale_16_i8_zero
+
 define <vscale x 2 x i64> @insert_vscale_2_i64_zero(<vscale x 2 x i64> %vec, i64 %elt) {
 ; CHECK-SD-LABEL: insert_vscale_2_i64_zero:
 ; CHECK-SD:       // %bb.0: // %entry
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 7056a4d28fed39..51aad4fe25d3b8 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -1,164 +1,308 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO16
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-NO16,CHECK-SD-NO16
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-FP16,CHECK-SD-FP16
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-NO16,CHECK-GI-NO16
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-FP16,CHECK-GI-FP16
 
 ; fptoui
 
 define i32 @fcvtzs_f32_i32_7(float %flt) {
-; CHECK-LABEL: fcvtzs_f32_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, s0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_f32_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, s0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_f32_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 128.0
   %cvt = fptosi float %fix to i32
   ret i32 %cvt
 }
 
 define i32 @fcvtzs_f32_i32_32(float %flt) {
-; CHECK-LABEL: fcvtzs_f32_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, s0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_f32_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, s0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_f32_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 4294967296.0
   %cvt = fptosi float %fix to i32
   ret i32 %cvt
 }
 
 define i64 @fcvtzs_f32_i64_7(float %flt) {
-; CHECK-LABEL: fcvtzs_f32_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x0, s0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_f32_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x0, s0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_f32_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzs x0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 128.0
   %cvt = fptosi float %fix to i64
   ret i64 %cvt
 }
 
 define i64 @fcvtzs_f32_i64_64(float %flt) {
-; CHECK-LABEL: fcvtzs_f32_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x0, s0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_f32_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x0, s0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_f32_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1602224128 // =0x5f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzs x0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 18446744073709551616.0
   %cvt = fptosi float %fix to i64
   ret i64 %cvt
 }
 
 define i32 @fcvtzs_f64_i32_7(double %dbl) {
-; CHECK-LABEL: fcvtzs_f64_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, d0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_f64_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, d0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_f64_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzs w0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 128.0
   %cvt = fptosi double %fix to i32
   ret i32 %cvt
 }
 
 define i32 @fcvtzs_f64_i32_32(double %dbl) {
-; CHECK-LABEL: fcvtzs_f64_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, d0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_f64_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, d0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_f64_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4751297606875873280 // =0x41f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzs w0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 4294967296.0
   %cvt = fptosi double %fix to i32
   ret i32 %cvt
 }
 
 define i64 @fcvtzs_f64_i64_7(double %dbl) {
-; CHECK-LABEL: fcvtzs_f64_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x0, d0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_f64_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x0, d0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_f64_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzs x0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 128.0
   %cvt = fptosi double %fix to i64
   ret i64 %cvt
 }
 
 define i64 @fcvtzs_f64_i64_64(double %dbl) {
-; CHECK-LABEL: fcvtzs_f64_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x0, d0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_f64_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x0, d0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_f64_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4895412794951729152 // =0x43f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzs x0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 18446744073709551616.0
   %cvt = fptosi double %fix to i64
   ret i64 %cvt
 }
 
 define i32 @fcvtzs_f16_i32_7(half %flt) {
-; CHECK-NO16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzs_f16_i32_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w0, h0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI8_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %flt, 128.0
   %cvt = fptosi half %fix to i32
   ret i32 %cvt
 }
 
 define i32 @fcvtzs_f16_i32_15(half %flt) {
-; CHECK-NO16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzs_f16_i32_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w0, h0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI9_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %flt, 32768.0
   %cvt = fptosi half %fix to i32
   ret i32 %cvt
 }
 
 define i64 @fcvtzs_f16_i64_7(half %flt) {
-; CHECK-NO16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzs_f16_i64_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs x0, h0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI10_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %flt, 128.0
   %cvt = fptosi half %fix to i64
   ret i64 %cvt
 }
 
 define i64 @fcvtzs_f16_i64_15(half %flt) {
-; CHECK-NO16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzs_f16_i64_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs x0, h0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI11_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %flt, 32768.0
   %cvt = fptosi half %fix to i64
   ret i64 %cvt
@@ -167,160 +311,302 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
 ; fptoui
 
 define i32 @fcvtzu_f32_i32_7(float %flt) {
-; CHECK-LABEL: fcvtzu_f32_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, s0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_f32_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, s0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_f32_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 128.0
   %cvt = fptoui float %fix to i32
   ret i32 %cvt
 }
 
 define i32 @fcvtzu_f32_i32_32(float %flt) {
-; CHECK-LABEL: fcvtzu_f32_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, s0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_f32_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, s0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_f32_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 4294967296.0
   %cvt = fptoui float %fix to i32
   ret i32 %cvt
 }
 
 define i64 @fcvtzu_f32_i64_7(float %flt) {
-; CHECK-LABEL: fcvtzu_f32_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x0, s0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_f32_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu x0, s0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_f32_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzu x0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 128.0
   %cvt = fptoui float %fix to i64
   ret i64 %cvt
 }
 
 define i64 @fcvtzu_f32_i64_64(float %flt) {
-; CHECK-LABEL: fcvtzu_f32_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x0, s0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_f32_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu x0, s0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_f32_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1602224128 // =0x5f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzu x0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 18446744073709551616.0
   %cvt = fptoui float %fix to i64
   ret i64 %cvt
 }
 
 define i32 @fcvtzu_f64_i32_7(double %dbl) {
-; CHECK-LABEL: fcvtzu_f64_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, d0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_f64_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, d0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_f64_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzu w0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 128.0
   %cvt = fptoui double %fix to i32
   ret i32 %cvt
 }
 
 define i32 @fcvtzu_f64_i32_32(double %dbl) {
-; CHECK-LABEL: fcvtzu_f64_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, d0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_f64_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, d0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_f64_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4751297606875873280 // =0x41f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzu w0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 4294967296.0
   %cvt = fptoui double %fix to i32
   ret i32 %cvt
 }
 
 define i64 @fcvtzu_f64_i64_7(double %dbl) {
-; CHECK-LABEL: fcvtzu_f64_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x0, d0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_f64_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu x0, d0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_f64_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzu x0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 128.0
   %cvt = fptoui double %fix to i64
   ret i64 %cvt
 }
 
 define i64 @fcvtzu_f64_i64_64(double %dbl) {
-; CHECK-LABEL: fcvtzu_f64_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x0, d0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_f64_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu x0, d0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_f64_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4895412794951729152 // =0x43f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzu x0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 18446744073709551616.0
   %cvt = fptoui double %fix to i64
   ret i64 %cvt
 }
 
 define i32 @fcvtzu_f16_i32_7(half %flt) {
-; CHECK-NO16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzu_f16_i32_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w0, h0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI20_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI20_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %flt, 128.0
   %cvt = fptoui half %fix to i32
   ret i32 %cvt
 }
 
 define i32 @fcvtzu_f16_i32_15(half %flt) {
-; CHECK-NO16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzu_f16_i32_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w0, h0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI21_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %flt, 32768.0
   %cvt = fptoui half %fix to i32
   ret i32 %cvt
 }
 
 define i64 @fcvtzu_f16_i64_7(half %flt) {
-; CHECK-NO16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzu_f16_i64_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu x0, h0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI22_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %flt, 128.0
   %cvt = fptoui half %fix to i64
   ret i64 %cvt
 }
 
 define i64 @fcvtzu_f16_i64_15(half %flt) {
-; CHECK-NO16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzu_f16_i64_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu x0, h0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzu_f16_i64_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzu_f16_i64_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI23_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI23_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %flt, 32768.0
   %cvt = fptoui half %fix to i64
   ret i64 %cvt
@@ -329,160 +615,302 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
 ; sitofp
 
 define float @scvtf_f32_i32_7(i32 %int) {
-; CHECK-LABEL: scvtf_f32_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf s0, w0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: scvtf_f32_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    scvtf s0, w0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: scvtf_f32_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NEXT:    scvtf s1, w0
+; CHECK-GI-NEXT:    fdiv s0, s1, s0
+; CHECK-GI-NEXT:    ret
   %cvt = sitofp i32 %int to float
   %fix = fdiv float %cvt, 128.0
   ret float %fix
 }
 
 define float @scvtf_f32_i32_32(i32 %int) {
-; CHECK-LABEL: scvtf_f32_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf s0, w0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: scvtf_f32_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    scvtf s0, w0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: scvtf_f32_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf s0, w0
+; CHECK-GI-NEXT:    mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NEXT:    ret
   %cvt = sitofp i32 %int to float
   %fix = fdiv float %cvt, 4294967296.0
   ret float %fix
 }
 
 define float @scvtf_f32_i64_7(i64 %long) {
-; CHECK-LABEL: scvtf_f32_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf s0, x0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: scvtf_f32_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    scvtf s0, x0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: scvtf_f32_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NEXT:    scvtf s1, x0
+; CHECK-GI-NEXT:    fdiv s0, s1, s0
+; CHECK-GI-NEXT:    ret
   %cvt = sitofp i64 %long to float
   %fix = fdiv float %cvt, 128.0
   ret float %fix
 }
 
 define float @scvtf_f32_i64_64(i64 %long) {
-; CHECK-LABEL: scvtf_f32_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf s0, x0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: scvtf_f32_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    scvtf s0, x0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: scvtf_f32_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf s0, x0
+; CHECK-GI-NEXT:    mov w8, #1602224128 // =0x5f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NEXT:    ret
   %cvt = sitofp i64 %long to float
   %fix = fdiv float %cvt, 18446744073709551616.0
   ret float %fix
 }
 
 define double @scvtf_f64_i32_7(i32 %int) {
-; CHECK-LABEL: scvtf_f64_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf d0, w0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: scvtf_f64_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    scvtf d0, w0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: scvtf_f64_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf d0, w0
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fdiv d0, d0, d1
+; CHECK-GI-NEXT:    ret
   %cvt = sitofp i32 %int to double
   %fix = fdiv double %cvt, 128.0
   ret double %fix
 }
 
 define double @scvtf_f64_i32_32(i32 %int) {
-; CHECK-LABEL: scvtf_f64_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf d0, w0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: scvtf_f64_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    scvtf d0, w0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: scvtf_f64_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf d0, w0
+; CHECK-GI-NEXT:    mov x8, #4751297606875873280 // =0x41f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fdiv d0, d0, d1
+; CHECK-GI-NEXT:    ret
   %cvt = sitofp i32 %int to double
   %fix = fdiv double %cvt, 4294967296.0
   ret double %fix
 }
 
 define double @scvtf_f64_i64_7(i64 %long) {
-; CHECK-LABEL: scvtf_f64_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf d0, x0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: scvtf_f64_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    scvtf d0, x0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: scvtf_f64_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf d0, x0
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fdiv d0, d0, d1
+; CHECK-GI-NEXT:    ret
   %cvt = sitofp i64 %long to double
   %fix = fdiv double %cvt, 128.0
   ret double %fix
 }
 
 define double @scvtf_f64_i64_64(i64 %long) {
-; CHECK-LABEL: scvtf_f64_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf d0, x0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: scvtf_f64_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    scvtf d0, x0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: scvtf_f64_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf d0, x0
+; CHECK-GI-NEXT:    mov x8, #4895412794951729152 // =0x43f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fdiv d0, d0, d1
+; CHECK-GI-NEXT:    ret
   %cvt = sitofp i64 %long to double
   %fix = fdiv double %cvt, 18446744073709551616.0
   ret double %fix
 }
 
 define half @scvtf_f16_i32_7(i32 %int) {
-; CHECK-NO16-LABEL: scvtf_f16_i32_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    scvtf s1, w0
-; CHECK-NO16-NEXT:    movi v0.2s, #60, lsl #24
-; CHECK-NO16-NEXT:    fcvt h1, s1
-; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fmul s0, s1, s0
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: scvtf_f16_i32_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    scvtf h0, w0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: scvtf_f16_i32_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    scvtf s1, w0
+; CHECK-SD-NO16-NEXT:    movi v0.2s, #60, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt h1, s1
+; CHECK-SD-NO16-NEXT:    fcvt s1, h1
+; CHECK-SD-NO16-NEXT:    fmul s0, s1, s0
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: scvtf_f16_i32_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    scvtf h0, w0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    scvtf s0, w0
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: scvtf_f16_i32_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    scvtf h0, w0
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI32_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
   %cvt = sitofp i32 %int to half
   %fix = fdiv half %cvt, 128.0
   ret half %fix
 }
 
 define half @scvtf_f16_i32_15(i32 %int) {
-; CHECK-NO16-LABEL: scvtf_f16_i32_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    scvtf s1, w0
-; CHECK-NO16-NEXT:    movi v0.2s, #56, lsl #24
-; CHECK-NO16-NEXT:    fcvt h1, s1
-; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fmul s0, s1, s0
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: scvtf_f16_i32_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    scvtf h0, w0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: scvtf_f16_i32_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    scvtf s1, w0
+; CHECK-SD-NO16-NEXT:    movi v0.2s, #56, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt h1, s1
+; CHECK-SD-NO16-NEXT:    fcvt s1, h1
+; CHECK-SD-NO16-NEXT:    fmul s0, s1, s0
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: scvtf_f16_i32_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    scvtf h0, w0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    scvtf s0, w0
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: scvtf_f16_i32_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    scvtf h0, w0
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI33_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
   %cvt = sitofp i32 %int to half
   %fix = fdiv half %cvt, 32768.0
   ret half %fix
 }
 
 define half @scvtf_f16_i64_7(i64 %long) {
-; CHECK-NO16-LABEL: scvtf_f16_i64_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    scvtf s1, x0
-; CHECK-NO16-NEXT:    movi v0.2s, #60, lsl #24
-; CHECK-NO16-NEXT:    fcvt h1, s1
-; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fmul s0, s1, s0
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: scvtf_f16_i64_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    scvtf h0, x0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: scvtf_f16_i64_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    scvtf s1, x0
+; CHECK-SD-NO16-NEXT:    movi v0.2s, #60, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt h1, s1
+; CHECK-SD-NO16-NEXT:    fcvt s1, h1
+; CHECK-SD-NO16-NEXT:    fmul s0, s1, s0
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: scvtf_f16_i64_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    scvtf h0, x0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    scvtf s0, x0
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: scvtf_f16_i64_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    scvtf h0, x0
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI34_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
   %cvt = sitofp i64 %long to half
   %fix = fdiv half %cvt, 128.0
   ret half %fix
 }
 
 define half @scvtf_f16_i64_15(i64 %long) {
-; CHECK-NO16-LABEL: scvtf_f16_i64_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    scvtf s1, x0
-; CHECK-NO16-NEXT:    movi v0.2s, #56, lsl #24
-; CHECK-NO16-NEXT:    fcvt h1, s1
-; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fmul s0, s1, s0
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: scvtf_f16_i64_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    scvtf h0, x0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: scvtf_f16_i64_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    scvtf s1, x0
+; CHECK-SD-NO16-NEXT:    movi v0.2s, #56, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt h1, s1
+; CHECK-SD-NO16-NEXT:    fcvt s1, h1
+; CHECK-SD-NO16-NEXT:    fmul s0, s1, s0
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: scvtf_f16_i64_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    scvtf h0, x0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    scvtf s0, x0
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: scvtf_f16_i64_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    scvtf h0, x0
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI35_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
   %cvt = sitofp i64 %long to half
   %fix = fdiv half %cvt, 32768.0
   ret half %fix
@@ -491,160 +919,302 @@ define half @scvtf_f16_i64_15(i64 %long) {
 ; uitofp
 
 define float @ucvtf_f32_i32_7(i32 %int) {
-; CHECK-LABEL: ucvtf_f32_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf s0, w0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ucvtf_f32_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf s0, w0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ucvtf_f32_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NEXT:    ucvtf s1, w0
+; CHECK-GI-NEXT:    fdiv s0, s1, s0
+; CHECK-GI-NEXT:    ret
   %cvt = uitofp i32 %int to float
   %fix = fdiv float %cvt, 128.0
   ret float %fix
 }
 
 define float @ucvtf_f32_i32_32(i32 %int) {
-; CHECK-LABEL: ucvtf_f32_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf s0, w0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ucvtf_f32_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf s0, w0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ucvtf_f32_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ucvtf s0, w0
+; CHECK-GI-NEXT:    mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NEXT:    ret
   %cvt = uitofp i32 %int to float
   %fix = fdiv float %cvt, 4294967296.0
   ret float %fix
 }
 
 define float @ucvtf_f32_i64_7(i64 %long) {
-; CHECK-LABEL: ucvtf_f32_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf s0, x0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ucvtf_f32_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf s0, x0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ucvtf_f32_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2s, #67, lsl #24
+; CHECK-GI-NEXT:    ucvtf s1, x0
+; CHECK-GI-NEXT:    fdiv s0, s1, s0
+; CHECK-GI-NEXT:    ret
   %cvt = uitofp i64 %long to float
   %fix = fdiv float %cvt, 128.0
   ret float %fix
 }
 
 define float @ucvtf_f32_i64_64(i64 %long) {
-; CHECK-LABEL: ucvtf_f32_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf s0, x0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ucvtf_f32_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf s0, x0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ucvtf_f32_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ucvtf s0, x0
+; CHECK-GI-NEXT:    mov w8, #1602224128 // =0x5f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NEXT:    ret
   %cvt = uitofp i64 %long to float
   %fix = fdiv float %cvt, 18446744073709551616.0
   ret float %fix
 }
 
 define double @ucvtf_f64_i32_7(i32 %int) {
-; CHECK-LABEL: ucvtf_f64_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf d0, w0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ucvtf_f64_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf d0, w0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ucvtf_f64_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ucvtf d0, w0
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fdiv d0, d0, d1
+; CHECK-GI-NEXT:    ret
   %cvt = uitofp i32 %int to double
   %fix = fdiv double %cvt, 128.0
   ret double %fix
 }
 
 define double @ucvtf_f64_i32_32(i32 %int) {
-; CHECK-LABEL: ucvtf_f64_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf d0, w0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ucvtf_f64_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf d0, w0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ucvtf_f64_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ucvtf d0, w0
+; CHECK-GI-NEXT:    mov x8, #4751297606875873280 // =0x41f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fdiv d0, d0, d1
+; CHECK-GI-NEXT:    ret
   %cvt = uitofp i32 %int to double
   %fix = fdiv double %cvt, 4294967296.0
   ret double %fix
 }
 
 define double @ucvtf_f64_i64_7(i64 %long) {
-; CHECK-LABEL: ucvtf_f64_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf d0, x0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ucvtf_f64_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf d0, x0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ucvtf_f64_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ucvtf d0, x0
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fdiv d0, d0, d1
+; CHECK-GI-NEXT:    ret
   %cvt = uitofp i64 %long to double
   %fix = fdiv double %cvt, 128.0
   ret double %fix
 }
 
 define double @ucvtf_f64_i64_64(i64 %long) {
-; CHECK-LABEL: ucvtf_f64_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf d0, x0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ucvtf_f64_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ucvtf d0, x0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ucvtf_f64_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ucvtf d0, x0
+; CHECK-GI-NEXT:    mov x8, #4895412794951729152 // =0x43f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fdiv d0, d0, d1
+; CHECK-GI-NEXT:    ret
   %cvt = uitofp i64 %long to double
   %fix = fdiv double %cvt, 18446744073709551616.0
   ret double %fix
 }
 
 define half @ucvtf_f16_i32_7(i32 %int) {
-; CHECK-NO16-LABEL: ucvtf_f16_i32_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    ucvtf s1, w0
-; CHECK-NO16-NEXT:    movi v0.2s, #60, lsl #24
-; CHECK-NO16-NEXT:    fcvt h1, s1
-; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fmul s0, s1, s0
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: ucvtf_f16_i32_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    ucvtf h0, w0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: ucvtf_f16_i32_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    ucvtf s1, w0
+; CHECK-SD-NO16-NEXT:    movi v0.2s, #60, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt h1, s1
+; CHECK-SD-NO16-NEXT:    fcvt s1, h1
+; CHECK-SD-NO16-NEXT:    fmul s0, s1, s0
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ucvtf_f16_i32_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    ucvtf h0, w0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    ucvtf s0, w0
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    ucvtf h0, w0
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI44_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
   %cvt = uitofp i32 %int to half
   %fix = fdiv half %cvt, 128.0
   ret half %fix
 }
 
 define half @ucvtf_f16_i32_15(i32 %int) {
-; CHECK-NO16-LABEL: ucvtf_f16_i32_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    ucvtf s1, w0
-; CHECK-NO16-NEXT:    movi v0.2s, #56, lsl #24
-; CHECK-NO16-NEXT:    fcvt h1, s1
-; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fmul s0, s1, s0
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: ucvtf_f16_i32_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    ucvtf h0, w0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: ucvtf_f16_i32_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    ucvtf s1, w0
+; CHECK-SD-NO16-NEXT:    movi v0.2s, #56, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt h1, s1
+; CHECK-SD-NO16-NEXT:    fcvt s1, h1
+; CHECK-SD-NO16-NEXT:    fmul s0, s1, s0
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ucvtf_f16_i32_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    ucvtf h0, w0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    ucvtf s0, w0
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    ucvtf h0, w0
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI45_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI45_0]
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
   %cvt = uitofp i32 %int to half
   %fix = fdiv half %cvt, 32768.0
   ret half %fix
 }
 
 define half @ucvtf_f16_i64_7(i64 %long) {
-; CHECK-NO16-LABEL: ucvtf_f16_i64_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    ucvtf s1, x0
-; CHECK-NO16-NEXT:    movi v0.2s, #60, lsl #24
-; CHECK-NO16-NEXT:    fcvt h1, s1
-; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fmul s0, s1, s0
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: ucvtf_f16_i64_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    ucvtf h0, x0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: ucvtf_f16_i64_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    ucvtf s1, x0
+; CHECK-SD-NO16-NEXT:    movi v0.2s, #60, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt h1, s1
+; CHECK-SD-NO16-NEXT:    fcvt s1, h1
+; CHECK-SD-NO16-NEXT:    fmul s0, s1, s0
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ucvtf_f16_i64_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    ucvtf h0, x0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    ucvtf s0, x0
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    ucvtf h0, x0
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI46_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI46_0]
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
   %cvt = uitofp i64 %long to half
   %fix = fdiv half %cvt, 128.0
   ret half %fix
 }
 
 define half @ucvtf_f16_i64_15(i64 %long) {
-; CHECK-NO16-LABEL: ucvtf_f16_i64_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    ucvtf s1, x0
-; CHECK-NO16-NEXT:    movi v0.2s, #56, lsl #24
-; CHECK-NO16-NEXT:    fcvt h1, s1
-; CHECK-NO16-NEXT:    fcvt s1, h1
-; CHECK-NO16-NEXT:    fmul s0, s1, s0
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: ucvtf_f16_i64_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    ucvtf h0, x0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: ucvtf_f16_i64_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    ucvtf s1, x0
+; CHECK-SD-NO16-NEXT:    movi v0.2s, #56, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt h1, s1
+; CHECK-SD-NO16-NEXT:    fcvt s1, h1
+; CHECK-SD-NO16-NEXT:    fmul s0, s1, s0
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: ucvtf_f16_i64_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    ucvtf h0, x0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    ucvtf s0, x0
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fdiv s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    ucvtf h0, x0
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI47_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI47_0]
+; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    ret
   %cvt = uitofp i64 %long to half
   %fix = fdiv half %cvt, 32768.0
   ret half %fix
@@ -661,150 +1231,285 @@ declare i32 @llvm.fptosi.sat.i32.f16(half)
 declare i64 @llvm.fptosi.sat.i64.f16(half)
 
 define i32 @fcvtzs_sat_f32_i32_7(float %flt) {
-; CHECK-LABEL: fcvtzs_sat_f32_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, s0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_sat_f32_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, s0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sat_f32_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 128.0
   %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix)
   ret i32 %cvt
 }
 
 define i32 @fcvtzs_sat_f32_i32_32(float %flt) {
-; CHECK-LABEL: fcvtzs_sat_f32_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, s0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_sat_f32_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, s0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sat_f32_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 4294967296.0
   %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix)
   ret i32 %cvt
 }
 
 define i64 @fcvtzs_sat_f32_i64_64(float %flt) {
-; CHECK-LABEL: fcvtzs_sat_f32_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x0, s0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_sat_f32_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x0, s0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sat_f32_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1602224128 // =0x5f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzs x0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 18446744073709551616.0
   %cvt = call i64 @llvm.fptosi.sat.i64.f32(float %fix)
   ret i64 %cvt
 }
 
 define i32 @fcvtzs_sat_f64_i32_7(double %dbl) {
-; CHECK-LABEL: fcvtzs_sat_f64_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, d0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_sat_f64_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, d0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sat_f64_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzs w0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 128.0
   %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix)
   ret i32 %cvt
 }
 
 define i32 @fcvtzs_sat_f64_i32_32(double %dbl) {
-; CHECK-LABEL: fcvtzs_sat_f64_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs w0, d0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_sat_f64_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs w0, d0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sat_f64_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4751297606875873280 // =0x41f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzs w0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 4294967296.0
   %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix)
   ret i32 %cvt
 }
 
 define i64 @fcvtzs_sat_f64_i64_7(double %dbl) {
-; CHECK-LABEL: fcvtzs_sat_f64_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x0, d0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_sat_f64_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x0, d0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sat_f64_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzs x0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 128.0
   %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix)
   ret i64 %cvt
 }
 
 define i64 @fcvtzs_sat_f64_i64_64(double %dbl) {
-; CHECK-LABEL: fcvtzs_sat_f64_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzs x0, d0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzs_sat_f64_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzs x0, d0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzs_sat_f64_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4895412794951729152 // =0x43f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzs x0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 18446744073709551616.0
   %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix)
   ret i64 %cvt
 }
 
 define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
-; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w0, h0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i32_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i32_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI55_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI55_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %dbl, 128.0
   %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix)
   ret i32 %cvt
 }
 
 define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
-; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzs w0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs w0, h0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i32_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i32_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs w0, h0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzs w0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI56_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI56_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %dbl, 32768.0
   %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix)
   ret i32 %cvt
 }
 
 define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
-; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs x0, h0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i64_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i64_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI57_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI57_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %dbl, 128.0
   %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix)
   ret i64 %cvt
 }
 
 define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
-; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzs x0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzs x0, h0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzs_sat_f16_i64_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzs_sat_f16_i64_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzs x0, h0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzs x0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI58_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI58_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %dbl, 32768.0
   %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix)
   ret i64 %cvt
@@ -820,151 +1525,290 @@ declare i32 @llvm.fptoui.sat.i32.f16(half)
 declare i64 @llvm.fptoui.sat.i64.f16(half)
 
 define i32 @fcvtzu_sat_f32_i32_7(float %flt) {
-; CHECK-LABEL: fcvtzu_sat_f32_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, s0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_sat_f32_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, s0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sat_f32_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 128.0
   %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix)
   ret i32 %cvt
 }
 
 define i32 @fcvtzu_sat_f32_i32_32(float %flt) {
-; CHECK-LABEL: fcvtzu_sat_f32_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, s0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_sat_f32_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, s0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sat_f32_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1333788672 // =0x4f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 4294967296.0
   %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix)
   ret i32 %cvt
 }
 
 define i64 @fcvtzu_sat_f32_i64_64(float %flt) {
-; CHECK-LABEL: fcvtzu_sat_f32_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x0, s0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_sat_f32_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu x0, s0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sat_f32_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1602224128 // =0x5f800000
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NEXT:    fcvtzu x0, s0
+; CHECK-GI-NEXT:    ret
   %fix = fmul float %flt, 18446744073709551616.0
   %cvt = call i64 @llvm.fptoui.sat.i64.f32(float %fix)
   ret i64 %cvt
 }
 
 define i32 @fcvtzu_sat_f64_i32_7(double %dbl) {
-; CHECK-LABEL: fcvtzu_sat_f64_i32_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, d0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_sat_f64_i32_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, d0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sat_f64_i32_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzu w0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 128.0
   %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix)
   ret i32 %cvt
 }
 
 define i32 @fcvtzu_sat_f64_i32_32(double %dbl) {
-; CHECK-LABEL: fcvtzu_sat_f64_i32_32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu w0, d0, #32
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_sat_f64_i32_32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu w0, d0, #32
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sat_f64_i32_32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4751297606875873280 // =0x41f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzu w0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 4294967296.0
   %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix)
   ret i32 %cvt
 }
 
 define i64 @fcvtzu_sat_f64_i64_7(double %dbl) {
-; CHECK-LABEL: fcvtzu_sat_f64_i64_7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x0, d0, #7
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_sat_f64_i64_7:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu x0, d0, #7
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sat_f64_i64_7:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4638707616191610880 // =0x4060000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzu x0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 128.0
   %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix)
   ret i64 %cvt
 }
 
 define i64 @fcvtzu_sat_f64_i64_64(double %dbl) {
-; CHECK-LABEL: fcvtzu_sat_f64_i64_64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtzu x0, d0, #64
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fcvtzu_sat_f64_i64_64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fcvtzu x0, d0, #64
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcvtzu_sat_f64_i64_64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #4895412794951729152 // =0x43f0000000000000
+; CHECK-GI-NEXT:    fmov d1, x8
+; CHECK-GI-NEXT:    fmul d0, d0, d1
+; CHECK-GI-NEXT:    fcvtzu x0, d0
+; CHECK-GI-NEXT:    ret
   %fix = fmul double %dbl, 18446744073709551616.0
   %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix)
   ret i64 %cvt
 }
 
 define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
-; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w0, h0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i32_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i32_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI66_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI66_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %dbl, 128.0
   %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix)
   ret i32 %cvt
 }
 
 define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
-; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzu w0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu w0, h0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i32_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i32_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu w0, h0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzu w0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI67_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI67_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %dbl, 32768.0
   %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix)
   ret i32 %cvt
 }
 
 define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
-; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_7:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #67, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_7:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu x0, h0, #7
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i64_7:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #67, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i64_7:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0, #7
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_7:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI68_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI68_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %dbl, 128.0
   %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix)
   ret i64 %cvt
 }
 
 define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
-; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_15:
-; CHECK-NO16:       // %bb.0:
-; CHECK-NO16-NEXT:    movi v1.2s, #71, lsl #24
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fmul s0, s0, s1
-; CHECK-NO16-NEXT:    fcvt h0, s0
-; CHECK-NO16-NEXT:    fcvt s0, h0
-; CHECK-NO16-NEXT:    fcvtzu x0, s0
-; CHECK-NO16-NEXT:    ret
-;
-; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_15:
-; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    fcvtzu x0, h0, #15
-; CHECK-FP16-NEXT:    ret
+; CHECK-SD-NO16-LABEL: fcvtzu_sat_f16_i64_15:
+; CHECK-SD-NO16:       // %bb.0:
+; CHECK-SD-NO16-NEXT:    movi v1.2s, #71, lsl #24
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-SD-NO16-NEXT:    fcvt h0, s0
+; CHECK-SD-NO16-NEXT:    fcvt s0, h0
+; CHECK-SD-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-SD-NO16-NEXT:    ret
+;
+; CHECK-SD-FP16-LABEL: fcvtzu_sat_f16_i64_15:
+; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    fcvtzu x0, h0, #15
+; CHECK-SD-FP16-NEXT:    ret
+;
+; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15:
+; CHECK-GI-NO16:       // %bb.0:
+; CHECK-GI-NO16-NEXT:    mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fmov s1, w8
+; CHECK-GI-NO16-NEXT:    fcvt s1, h1
+; CHECK-GI-NO16-NEXT:    fmul s0, s0, s1
+; CHECK-GI-NO16-NEXT:    fcvt h0, s0
+; CHECK-GI-NO16-NEXT:    fcvt s0, h0
+; CHECK-GI-NO16-NEXT:    fcvtzu x0, s0
+; CHECK-GI-NO16-NEXT:    ret
+;
+; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_15:
+; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI69_0
+; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI69_0]
+; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
+; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
+; CHECK-GI-FP16-NEXT:    ret
   %fix = fmul half %dbl, 32768.0
   %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix)
   ret i64 %cvt
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
+; CHECK-FP16: {{.*}}
+; CHECK-NO16: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
index bbfec8c7c33617..4ab5db450a7f32 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
 ; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
index 4cce06dce44c9b..a80d51bac99297 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
@@ -1,11 +1,84 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -mattr=+fullfp16 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; Check that constrained fp intrinsics are correctly lowered.
 
+; CHECK-GI:       warning: Instruction selection used fallback path for add_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sub_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for mul_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for div_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for frem_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_i32_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_i32_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_i64_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_i64_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f16_i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f16_i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f16_i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f16_i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f16_i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f16_i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sqrt_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for powi_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sin_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cos_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tan_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for asin_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for acos_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan2_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sinh_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cosh_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tanh_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for pow_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log10_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log2_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp2_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lrint_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for llrint_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maxnum_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minnum_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lround_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for llround_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_olt_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ole_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ogt_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_oge_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_oeq_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_one_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ult_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ule_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ugt_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_uge_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ueq_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_une_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_olt_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ole_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ogt_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_oge_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_oeq_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_one_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ult_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ule_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ugt_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_uge_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ueq_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_une_f16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptrunc_f16_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fpext_f32_f16
 
 ; Half-precision intrinsics
 
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
index 6147afba4e603a..83e60c10897624 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -1,9 +1,86 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 %s -disable-strictnode-mutation -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64 %s -disable-strictnode-mutation -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 -disable-strictnode-mutation %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; Check that constrained fp vector intrinsics are correctly lowered.
 
+; CHECK-GI:       warning: Instruction selection used fallback path for add_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sub_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for mul_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for div_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_v4i32_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_v4i32_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_v4i64_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_v4i64_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_v4f32_v4i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_v4f32_v4i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_v4f32_v4i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_v4f32_v4i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sqrt_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maxnum_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minnum_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_v4f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for add_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sub_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for mul_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for div_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_v2i32_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_v2i32_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_v2i64_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_v2i64_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_v2f64_v2i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_v2f64_v2i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_v2f64_v2i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_v2f64_v2i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sqrt_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maxnum_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minnum_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for add_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sub_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for mul_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for div_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_v1i32_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_v1i32_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_v1i64_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_v1i64_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_v1f64_v1i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_v1f64_v1i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_v1f64_v1i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_v1f64_v1i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sqrt_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maxnum_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minnum_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_v1f61
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_v1f61
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptrunc_v2f32_v2f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fpext_v2f64_v2f32
 
 ; Single-precision intrinsics
 
@@ -882,3 +959,7 @@ declare <1 x i1> @llvm.experimental.constrained.fcmps.v1f64(<1 x double>, <1 x d
 
 declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
index fd3a0c3207606c..f2a14a9b73fa16 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
@@ -1,543 +1,1037 @@
-; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
-; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel=true -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; Check that constrained fp intrinsics are correctly lowered.
 
+; CHECK-GI:       warning: Instruction selection used fallback path for add_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sub_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for mul_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for div_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for frem_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_i32_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_i32_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_i64_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_i64_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f32_i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f32_i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f32_i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f32_i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f32_i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f32_i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sqrt_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for powi_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sin_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cos_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tan_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for asin_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for acos_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan2_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sinh_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cosh_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tanh_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for pow_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log10_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log2_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp2_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lrint_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for llrint_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maxnum_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minnum_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maximum_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minimum_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lround_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for llround_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_olt_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ole_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ogt_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_oge_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_oeq_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_one_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ult_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ule_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ugt_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_uge_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ueq_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_une_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_olt_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ole_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ogt_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_oge_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_oeq_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_one_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ult_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ule_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ugt_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_uge_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ueq_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_une_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for add_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sub_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for mul_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for div_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for frem_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_i32_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_i32_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_i64_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_i64_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f64_i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f64_i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f64_i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f64_i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f64_i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f64_i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sqrt_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for powi_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sin_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cos_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tan_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for asin_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for acos_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan2_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sinh_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cosh_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tanh_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for pow_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log10_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log2_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp2_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lrint_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for llrint_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maxnum_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minnum_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maximum_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minimum_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lround_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for llround_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for roundeven_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_olt_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ole_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ogt_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_oge_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_oeq_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_one_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ult_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ule_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ugt_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_uge_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ueq_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_une_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_olt_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ole_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ogt_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_oge_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_oeq_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_one_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ult_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ule_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ugt_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_uge_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ueq_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_une_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for add_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sub_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for mul_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for div_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for frem_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fma_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_i32_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_i32_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptosi_i64_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptoui_i64_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f128_i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f128_i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f128_i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f128_i64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sitofp_f128_i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uitofp_f128_i128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sqrt_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for powi_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sin_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cos_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tan_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for asin_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for acos_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan2_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sinh_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cosh_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tanh_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for pow_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log10_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log2_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp2_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for rint_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for nearbyint_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lrint_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for llrint_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for maxnum_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for minnum_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for ceil_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for floor_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for lround_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for llround_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for round_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for trunc_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_olt_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ole_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ogt_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_oge_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_oeq_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_one_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ult_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ule_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ugt_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_uge_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_ueq_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmp_une_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_olt_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ole_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ogt_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_oge_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_oeq_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_one_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ult_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ule_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ugt_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_uge_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_ueq_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fcmps_une_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptrunc_f32_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptrunc_f32_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fptrunc_f64_f128
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fpext_f64_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fpext_f128_f32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fpext_f128_f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sin_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cos_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tan_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for asin_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for acos_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for atan2_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sinh_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for cosh_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for tanh_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for pow_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log2_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for log10_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp_v1f64
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for exp2_v1f64
+
 
 ; Single-precision intrinsics
 
-; CHECK-LABEL: add_f32:
-; CHECK: fadd s0, s0, s1
 define float @add_f32(float %x, float %y) #0 {
+; CHECK-LABEL: add_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd s0, s0, s1
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: sub_f32:
-; CHECK: fsub s0, s0, s1
 define float @sub_f32(float %x, float %y) #0 {
+; CHECK-LABEL: sub_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub s0, s0, s1
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: mul_f32:
-; CHECK: fmul s0, s0, s1
 define float @mul_f32(float %x, float %y) #0 {
+; CHECK-LABEL: mul_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul s0, s0, s1
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: div_f32:
-; CHECK: fdiv s0, s0, s1
 define float @div_f32(float %x, float %y) #0 {
+; CHECK-LABEL: div_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fdiv s0, s0, s1
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: frem_f32:
-; CHECK: bl fmodf
 define float @frem_f32(float %x, float %y) #0 {
+; CHECK-LABEL: frem_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl fmodf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: fma_f32:
-; CHECK: fmadd s0, s0, s1, s2
 define float @fma_f32(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: fma_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmadd s0, s0, s1, s2
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: fptosi_i32_f32:
-; CHECK: fcvtzs w0, s0
 define i32 @fptosi_i32_f32(float %x) #0 {
+; CHECK-LABEL: fptosi_i32_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs w0, s0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: fptoui_i32_f32:
-; CHECK: fcvtzu w0, s0
 define i32 @fptoui_i32_f32(float %x) #0 {
+; CHECK-LABEL: fptoui_i32_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu w0, s0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: fptosi_i64_f32:
-; CHECK: fcvtzs x0, s0
 define i64 @fptosi_i64_f32(float %x) #0 {
+; CHECK-LABEL: fptosi_i64_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs x0, s0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: fptoui_i64_f32:
-; CHECK: fcvtzu x0, s0
 define i64 @fptoui_i64_f32(float %x) #0 {
+; CHECK-LABEL: fptoui_i64_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu x0, s0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: sitofp_f32_i32:
-; CHECK: scvtf s0, w0
 define float @sitofp_f32_i32(i32 %x) #0 {
+; CHECK-LABEL: sitofp_f32_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf s0, w0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: uitofp_f32_i32:
-; CHECK: ucvtf s0, w0
 define float @uitofp_f32_i32(i32 %x) #0 {
+; CHECK-LABEL: uitofp_f32_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf s0, w0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: sitofp_f32_i64:
-; CHECK: scvtf s0, x0
 define float @sitofp_f32_i64(i64 %x) #0 {
+; CHECK-LABEL: sitofp_f32_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf s0, x0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: uitofp_f32_i64:
-; CHECK: ucvtf s0, x0
 define float @uitofp_f32_i64(i64 %x) #0 {
+; CHECK-LABEL: uitofp_f32_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf s0, x0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: sitofp_f32_i128:
-; CHECK: bl __floattisf
 define float @sitofp_f32_i128(i128 %x) #0 {
+; CHECK-LABEL: sitofp_f32_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floattisf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.sitofp.f32.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: uitofp_f32_i128:
-; CHECK: bl __floatuntisf
 define float @uitofp_f32_i128(i128 %x) #0 {
+; CHECK-LABEL: uitofp_f32_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floatuntisf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.uitofp.f32.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: sqrt_f32:
-; CHECK: fsqrt s0, s0
 define float @sqrt_f32(float %x) #0 {
+; CHECK-LABEL: sqrt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsqrt s0, s0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: powi_f32:
-; CHECK: bl __powisf2
 define float @powi_f32(float %x, i32 %y) #0 {
+; CHECK-LABEL: powi_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __powisf2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.powi.f32(float %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: sin_f32:
-; CHECK: bl sinf
 define float @sin_f32(float %x) #0 {
+; CHECK-LABEL: sin_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sinf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.sin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: cos_f32:
-; CHECK: bl cosf
 define float @cos_f32(float %x) #0 {
+; CHECK-LABEL: cos_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl cosf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.cos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: tan_f32:
-; CHECK: bl tanf
 define float @tan_f32(float %x) #0 {
+; CHECK-LABEL: tan_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl tanf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.tan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: asin_f32:
-; CHECK: bl asinf
 define float @asin_f32(float %x) #0 {
+; CHECK-LABEL: asin_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl asinf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: acos_f32:
-; CHECK: bl acosf
 define float @acos_f32(float %x) #0 {
+; CHECK-LABEL: acos_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl acosf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: atan_f32:
-; CHECK: bl atanf
 define float @atan_f32(float %x) #0 {
+; CHECK-LABEL: atan_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl atanf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: atan2_f32:
-; CHECK: bl atan2f
 define float @atan2_f32(float %x, float %y) #0 {
+; CHECK-LABEL: atan2_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl atan2f
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.atan2.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: sinh_f32:
-; CHECK: bl sinhf
 define float @sinh_f32(float %x) #0 {
+; CHECK-LABEL: sinh_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sinhf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: cosh_f32:
-; CHECK: bl coshf
 define float @cosh_f32(float %x) #0 {
+; CHECK-LABEL: cosh_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl coshf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: tanh_f32:
-; CHECK: bl tanhf
 define float @tanh_f32(float %x) #0 {
+; CHECK-LABEL: tanh_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl tanhf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: pow_f32:
-; CHECK: bl powf
 define float @pow_f32(float %x, float %y) #0 {
+; CHECK-LABEL: pow_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl powf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.pow.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: log_f32:
-; CHECK: bl logf
 define float @log_f32(float %x) #0 {
+; CHECK-LABEL: log_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl logf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.log.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: log10_f32:
-; CHECK: bl log10f
 define float @log10_f32(float %x) #0 {
+; CHECK-LABEL: log10_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log10f
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.log10.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: log2_f32:
-; CHECK: bl log2f
 define float @log2_f32(float %x) #0 {
+; CHECK-LABEL: log2_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log2f
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.log2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: exp_f32:
-; CHECK: bl expf
 define float @exp_f32(float %x) #0 {
+; CHECK-LABEL: exp_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl expf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.exp.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: exp2_f32:
-; CHECK: bl exp2f
 define float @exp2_f32(float %x) #0 {
+; CHECK-LABEL: exp2_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl exp2f
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.exp2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: rint_f32:
-; CHECK: frintx s0, s0
 define float @rint_f32(float %x) #0 {
+; CHECK-LABEL: rint_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.rint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: nearbyint_f32:
-; CHECK: frinti s0, s0
 define float @nearbyint_f32(float %x) #0 {
+; CHECK-LABEL: nearbyint_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinti s0, s0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.nearbyint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: lrint_f32:
-; CHECK: frintx [[REG:s[0-9]+]], s0
-; CHECK: fcvtzs w0, [[REG]]
 define i32 @lrint_f32(float %x) #0 {
+; CHECK-LABEL: lrint_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    fcvtzs w0, s0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: llrint_f32:
-; CHECK: frintx [[REG:s[0-9]+]], s0
-; CHECK: fcvtzs x0, [[REG]]
 define i64 @llrint_f32(float %x) #0 {
+; CHECK-LABEL: llrint_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx s0, s0
+; CHECK-NEXT:    fcvtzs x0, s0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: maxnum_f32:
-; CHECK: fmaxnm s0, s0, s1
 define float @maxnum_f32(float %x, float %y) #0 {
+; CHECK-LABEL: maxnum_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.maxnum.f32(float %x, float %y, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: minnum_f32:
-; CHECK: fminnm s0, s0, s1
 define float @minnum_f32(float %x, float %y) #0 {
+; CHECK-LABEL: minnum_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm s0, s0, s1
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.minnum.f32(float %x, float %y, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: maximum_f32:
-; CHECK: fmax s0, s0, s1
 define float @maximum_f32(float %x, float %y) #0 {
+; CHECK-LABEL: maximum_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax s0, s0, s1
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.maximum.f32(float %x, float %y, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: minimum_f32:
-; CHECK: fmin s0, s0, s1
 define float @minimum_f32(float %x, float %y) #0 {
+; CHECK-LABEL: minimum_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin s0, s0, s1
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.minimum.f32(float %x, float %y, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: ceil_f32:
-; CHECK: frintp s0, s0
 define float @ceil_f32(float %x) #0 {
+; CHECK-LABEL: ceil_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp s0, s0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.ceil.f32(float %x, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: floor_f32:
-; CHECK: frintm s0, s0
 define float @floor_f32(float %x) #0 {
+; CHECK-LABEL: floor_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm s0, s0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.floor.f32(float %x, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: lround_f32:
-; CHECK: fcvtas w0, s0
 define i32 @lround_f32(float %x) #0 {
+; CHECK-LABEL: lround_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas w0, s0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: llround_f32:
-; CHECK: fcvtas x0, s0
 define i64 @llround_f32(float %x) #0 {
+; CHECK-LABEL: llround_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas x0, s0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: round_f32:
-; CHECK: frinta s0, s0
 define float @round_f32(float %x) #0 {
+; CHECK-LABEL: round_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta s0, s0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.round.f32(float %x, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: roundeven_f32:
-; CHECK: frintn s0, s0
 define float @roundeven_f32(float %x) #0 {
+; CHECK-LABEL: roundeven_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn s0, s0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: trunc_f32:
-; CHECK: frintz s0, s0
 define float @trunc_f32(float %x) #0 {
+; CHECK-LABEL: trunc_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.trunc.f32(float %x, metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: fcmp_olt_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_olt_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_olt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ole_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_ole_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_ole_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, ls
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ogt_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_ogt_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_ogt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_oge_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_oge_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_oge_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_oeq_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_oeq_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_oeq_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_one_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_one_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_one_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w8, mi
+; CHECK-NEXT:    csinc w0, w8, wzr, le
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ult_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_ult_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_ult_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ule_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_ule_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_ule_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ugt_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_ugt_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_ugt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_uge_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_uge_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_uge_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, pl
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ueq_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_ueq_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_ueq_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    csinc w0, w8, wzr, vc
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_une_f32:
-; CHECK: fcmp s0, s1
 define i32 @fcmp_une_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmp_une_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_olt_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_olt_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_olt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"olt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ole_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_ole_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_ole_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, ls
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ole", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ogt_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_ogt_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_ogt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ogt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_oge_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_oge_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_oge_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_oeq_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_oeq_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_oeq_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_one_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_one_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_one_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w8, mi
+; CHECK-NEXT:    csinc w0, w8, wzr, le
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"one", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ult_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_ult_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_ult_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ult", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ule_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_ule_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_ule_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ule", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ugt_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_ugt_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_ugt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ugt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_uge_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_uge_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_uge_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, pl
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"uge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ueq_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_ueq_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_ueq_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    csinc w0, w8, wzr, vc
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"ueq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_une_f32:
-; CHECK: fcmpe s0, s1
 define i32 @fcmps_une_f32(float %a, float %b) #0 {
+; CHECK-LABEL: fcmps_une_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe s0, s1
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(float %a, float %b, metadata !"une", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -546,538 +1040,792 @@ define i32 @fcmps_une_f32(float %a, float %b) #0 {
 
 ; Double-precision intrinsics
 
-; CHECK-LABEL: add_f64:
-; CHECK: fadd d0, d0, d1
 define double @add_f64(double %x, double %y) #0 {
+; CHECK-LABEL: add_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fadd d0, d0, d1
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: sub_f64:
-; CHECK: fsub d0, d0, d1
 define double @sub_f64(double %x, double %y) #0 {
+; CHECK-LABEL: sub_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsub d0, d0, d1
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: mul_f64:
-; CHECK: fmul d0, d0, d1
 define double @mul_f64(double %x, double %y) #0 {
+; CHECK-LABEL: mul_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: div_f64:
-; CHECK: fdiv d0, d0, d1
 define double @div_f64(double %x, double %y) #0 {
+; CHECK-LABEL: div_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fdiv d0, d0, d1
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.fdiv.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: frem_f64:
-; CHECK: bl fmod
 define double @frem_f64(double %x, double %y) #0 {
+; CHECK-LABEL: frem_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl fmod
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.frem.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: fma_f64:
-; CHECK: fmadd d0, d0, d1, d2
 define double @fma_f64(double %x, double %y, double %z) #0 {
+; CHECK-LABEL: fma_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmadd d0, d0, d1, d2
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: fptosi_i32_f64:
-; CHECK: fcvtzs w0, d0
 define i32 @fptosi_i32_f64(double %x) #0 {
+; CHECK-LABEL: fptosi_i32_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs w0, d0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: fptoui_i32_f64:
-; CHECK: fcvtzu w0, d0
 define i32 @fptoui_i32_f64(double %x) #0 {
+; CHECK-LABEL: fptoui_i32_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu w0, d0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: fptosi_i64_f64:
-; CHECK: fcvtzs x0, d0
 define i64 @fptosi_i64_f64(double %x) #0 {
+; CHECK-LABEL: fptosi_i64_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzs x0, d0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: fptoui_i64_f64:
-; CHECK: fcvtzu x0, d0
 define i64 @fptoui_i64_f64(double %x) #0 {
+; CHECK-LABEL: fptoui_i64_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtzu x0, d0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: sitofp_f64_i32:
-; CHECK: scvtf d0, w0
 define double @sitofp_f64_i32(i32 %x) #0 {
+; CHECK-LABEL: sitofp_f64_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf d0, w0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: uitofp_f64_i32:
-; CHECK: ucvtf d0, w0
 define double @uitofp_f64_i32(i32 %x) #0 {
+; CHECK-LABEL: uitofp_f64_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf d0, w0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: sitofp_f64_i64:
-; CHECK: scvtf d0, x0
 define double @sitofp_f64_i64(i64 %x) #0 {
+; CHECK-LABEL: sitofp_f64_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    scvtf d0, x0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: uitofp_f64_i64:
-; CHECK: ucvtf d0, x0
 define double @uitofp_f64_i64(i64 %x) #0 {
+; CHECK-LABEL: uitofp_f64_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ucvtf d0, x0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: sitofp_f64_i128:
-; CHECK: bl __floattidf
 define double @sitofp_f64_i128(i128 %x) #0 {
+; CHECK-LABEL: sitofp_f64_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floattidf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.sitofp.f64.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: uitofp_f64_i128:
-; CHECK: bl __floatuntidf
 define double @uitofp_f64_i128(i128 %x) #0 {
+; CHECK-LABEL: uitofp_f64_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floatuntidf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.uitofp.f64.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: sqrt_f64:
-; CHECK: fsqrt d0, d0
 define double @sqrt_f64(double %x) #0 {
+; CHECK-LABEL: sqrt_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fsqrt d0, d0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.sqrt.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: powi_f64:
-; CHECK: bl __powidf2
 define double @powi_f64(double %x, i32 %y) #0 {
+; CHECK-LABEL: powi_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __powidf2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.powi.f64(double %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: sin_f64:
-; CHECK: bl sin
 define double @sin_f64(double %x) #0 {
+; CHECK-LABEL: sin_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sin
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.sin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: cos_f64:
-; CHECK: bl cos
 define double @cos_f64(double %x) #0 {
+; CHECK-LABEL: cos_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl cos
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.cos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: tan_f64:
-; CHECK: bl tan
 define double @tan_f64(double %x) #0 {
+; CHECK-LABEL: tan_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl tan
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.tan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: asin_f64:
-; CHECK: bl asin
 define double @asin_f64(double %x) #0 {
+; CHECK-LABEL: asin_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl asin
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.asin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: acos_f64:
-; CHECK: bl acos
 define double @acos_f64(double %x) #0 {
+; CHECK-LABEL: acos_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl acos
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.acos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: atan_f64:
-; CHECK: bl atan
 define double @atan_f64(double %x) #0 {
+; CHECK-LABEL: atan_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl atan
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.atan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: atan2_f64:
-; CHECK: bl atan2
 define double @atan2_f64(double %x, double %y) #0 {
+; CHECK-LABEL: atan2_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl atan2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.atan2.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: sinh_f64:
-; CHECK: bl sinh
 define double @sinh_f64(double %x) #0 {
+; CHECK-LABEL: sinh_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sinh
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.sinh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: cosh_f64:
-; CHECK: bl cosh
 define double @cosh_f64(double %x) #0 {
+; CHECK-LABEL: cosh_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl cosh
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.cosh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: tanh_f64:
-; CHECK: bl tanh
 define double @tanh_f64(double %x) #0 {
+; CHECK-LABEL: tanh_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl tanh
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.tanh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: pow_f64:
-; CHECK: bl pow
 define double @pow_f64(double %x, double %y) #0 {
+; CHECK-LABEL: pow_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl pow
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.pow.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: log_f64:
-; CHECK: bl log
 define double @log_f64(double %x) #0 {
+; CHECK-LABEL: log_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.log.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: log10_f64:
-; CHECK: bl log10
 define double @log10_f64(double %x) #0 {
+; CHECK-LABEL: log10_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log10
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.log10.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: log2_f64:
-; CHECK: bl log2
 define double @log2_f64(double %x) #0 {
+; CHECK-LABEL: log2_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.log2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: exp_f64:
-; CHECK: bl exp
 define double @exp_f64(double %x) #0 {
+; CHECK-LABEL: exp_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl exp
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.exp.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: exp2_f64:
-; CHECK: bl exp2
 define double @exp2_f64(double %x) #0 {
+; CHECK-LABEL: exp2_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl exp2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.exp2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: rint_f64:
-; CHECK: frintx d0, d0
 define double @rint_f64(double %x) #0 {
+; CHECK-LABEL: rint_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.rint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: nearbyint_f64:
-; CHECK: frinti d0, d0
 define double @nearbyint_f64(double %x) #0 {
+; CHECK-LABEL: nearbyint_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinti d0, d0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.nearbyint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: lrint_f64:
-; CHECK: frintx [[REG:d[0-9]+]], d0
-; CHECK: fcvtzs w0, [[REG]]
 define i32 @lrint_f64(double %x) #0 {
+; CHECK-LABEL: lrint_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs w0, d0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: llrint_f64:
-; CHECK: frintx [[REG:d[0-9]+]], d0
-; CHECK: fcvtzs x0, [[REG]]
 define i64 @llrint_f64(double %x) #0 {
+; CHECK-LABEL: llrint_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintx d0, d0
+; CHECK-NEXT:    fcvtzs x0, d0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: maxnum_f64:
-; CHECK: fmaxnm d0, d0, d1
 define double @maxnum_f64(double %x, double %y) #0 {
+; CHECK-LABEL: maxnum_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnm d0, d0, d1
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.maxnum.f64(double %x, double %y, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: minnum_f64:
-; CHECK: fminnm d0, d0, d1
 define double @minnum_f64(double %x, double %y) #0 {
+; CHECK-LABEL: minnum_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnm d0, d0, d1
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.minnum.f64(double %x, double %y, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: maximum_f64:
-; CHECK: fmax d0, d0, d1
 define double @maximum_f64(double %x, double %y) #0 {
+; CHECK-LABEL: maximum_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmax d0, d0, d1
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.maximum.f64(double %x, double %y, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: minimum_f64:
-; CHECK: fmin d0, d0, d1
 define double @minimum_f64(double %x, double %y) #0 {
+; CHECK-LABEL: minimum_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmin d0, d0, d1
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.minimum.f64(double %x, double %y, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: ceil_f64:
-; CHECK: frintp d0, d0
 define double @ceil_f64(double %x) #0 {
+; CHECK-LABEL: ceil_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintp d0, d0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.ceil.f64(double %x, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: floor_f64:
-; CHECK: frintm d0, d0
 define double @floor_f64(double %x) #0 {
+; CHECK-LABEL: floor_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintm d0, d0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.floor.f64(double %x, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: lround_f64:
-; CHECK: fcvtas w0, d0
 define i32 @lround_f64(double %x) #0 {
+; CHECK-LABEL: lround_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas w0, d0
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: llround_f64:
-; CHECK: fcvtas x0, d0
 define i64 @llround_f64(double %x) #0 {
+; CHECK-LABEL: llround_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvtas x0, d0
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: round_f64:
-; CHECK: frinta d0, d0
 define double @round_f64(double %x) #0 {
+; CHECK-LABEL: round_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frinta d0, d0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.round.f64(double %x, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: roundeven_f64:
-; CHECK: frintn d0, d0
 define double @roundeven_f64(double %x) #0 {
+; CHECK-LABEL: roundeven_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintn d0, d0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: trunc_f64:
-; CHECK: frintz d0, d0
 define double @trunc_f64(double %x) #0 {
+; CHECK-LABEL: trunc_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    frintz d0, d0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.trunc.f64(double %x, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: fcmp_olt_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_olt_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_olt_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ole_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_ole_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_ole_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, ls
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ogt_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_ogt_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_ogt_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_oge_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_oge_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_oge_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_oeq_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_oeq_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_oeq_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_one_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_one_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_one_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w8, mi
+; CHECK-NEXT:    csinc w0, w8, wzr, le
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ult_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_ult_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_ult_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ule_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_ule_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_ule_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ugt_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_ugt_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_ugt_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_uge_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_uge_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_uge_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, pl
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ueq_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_ueq_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_ueq_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    csinc w0, w8, wzr, vc
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_une_f64:
-; CHECK: fcmp d0, d1
 define i32 @fcmp_une_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmp_une_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmp d0, d1
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_olt_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_olt_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_olt_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, mi
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"olt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ole_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_ole_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_ole_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, ls
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ole", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ogt_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_ogt_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_ogt_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ogt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_oge_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_oge_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_oge_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_oeq_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_oeq_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_oeq_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_one_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_one_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_one_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w8, mi
+; CHECK-NEXT:    csinc w0, w8, wzr, le
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"one", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ult_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_ult_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_ult_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ult", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ule_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_ule_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_ule_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ule", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ugt_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_ugt_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_ugt_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, hi
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ugt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_uge_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_uge_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_uge_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, pl
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"uge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ueq_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_ueq_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_ueq_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    csinc w0, w8, wzr, vc
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"ueq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_une_f64:
-; CHECK: fcmpe d0, d1
 define i32 @fcmps_une_f64(double %a, double %b) #0 {
+; CHECK-LABEL: fcmps_une_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcmpe d0, d1
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"une", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -1086,515 +1834,1015 @@ define i32 @fcmps_une_f64(double %a, double %b) #0 {
 
 ; Long-double-precision intrinsics
 
-; CHECK-LABEL: add_f128:
-; CHECK: bl __addtf3
 define fp128 @add_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: add_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __addtf3
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: sub_f128:
-; CHECK: bl __subtf3
 define fp128 @sub_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: sub_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __subtf3
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: mul_f128:
-; CHECK: bl __multf3
 define fp128 @mul_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: mul_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __multf3
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: div_f128:
-; CHECK: bl __divtf3
 define fp128 @div_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: div_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __divtf3
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: frem_f128:
-; CHECK: bl fmodl
 define fp128 @frem_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: frem_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl fmodl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: fma_f128:
-; CHECK: fmal
 define fp128 @fma_f128(fp128 %x, fp128 %y, fp128 %z) #0 {
+; CHECK-LABEL: fma_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl fmal
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y, fp128 %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: fptosi_i32_f128:
-; CHECK: bl __fixtfsi
 define i32 @fptosi_i32_f128(fp128 %x) #0 {
+; CHECK-LABEL: fptosi_i32_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __fixtfsi
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: fptoui_i32_f128:
-; CHECK: bl __fixunstfsi
 define i32 @fptoui_i32_f128(fp128 %x) #0 {
+; CHECK-LABEL: fptoui_i32_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __fixunstfsi
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: fptosi_i64_f128:
-; CHECK: bl __fixtfdi
 define i64 @fptosi_i64_f128(fp128 %x) #0 {
+; CHECK-LABEL: fptosi_i64_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __fixtfdi
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: fptoui_i64_f128:
-; CHECK: bl __fixunstfdi
 define i64 @fptoui_i64_f128(fp128 %x) #0 {
+; CHECK-LABEL: fptoui_i64_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __fixunstfdi
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: sitofp_f128_i32:
-; CHECK: bl __floatsitf
 define fp128 @sitofp_f128_i32(i32 %x) #0 {
+; CHECK-LABEL: sitofp_f128_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floatsitf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: uitofp_f128_i32:
-; CHECK: bl __floatunsitf
 define fp128 @uitofp_f128_i32(i32 %x) #0 {
+; CHECK-LABEL: uitofp_f128_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floatunsitf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: sitofp_f128_i64:
-; CHECK: bl __floatditf
 define fp128 @sitofp_f128_i64(i64 %x) #0 {
+; CHECK-LABEL: sitofp_f128_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floatditf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: uitofp_f128_i64:
-; CHECK: bl __floatunditf
 define fp128 @uitofp_f128_i64(i64 %x) #0 {
+; CHECK-LABEL: uitofp_f128_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floatunditf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: sitofp_f128_i128:
-; CHECK: bl __floattitf
 define fp128 @sitofp_f128_i128(i128 %x) #0 {
+; CHECK-LABEL: sitofp_f128_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floattitf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.sitofp.f128.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: uitofp_f128_i128:
-; CHECK: bl __floatuntitf
 define fp128 @uitofp_f128_i128(i128 %x) #0 {
+; CHECK-LABEL: uitofp_f128_i128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __floatuntitf
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.uitofp.f128.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: sqrt_f128:
-; CHECK: bl sqrtl
 define fp128 @sqrt_f128(fp128 %x) #0 {
+; CHECK-LABEL: sqrt_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sqrtl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: powi_f128:
-; CHECK: bl __powitf2
 define fp128 @powi_f128(fp128 %x, i32 %y) #0 {
+; CHECK-LABEL: powi_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __powitf2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: sin_f128:
-; CHECK: bl sinl
 define fp128 @sin_f128(fp128 %x) #0 {
+; CHECK-LABEL: sin_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sinl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: cos_f128:
-; CHECK: bl cosl
 define fp128 @cos_f128(fp128 %x) #0 {
+; CHECK-LABEL: cos_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl cosl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: tan_f128:
-; CHECK: bl tanl
 define fp128 @tan_f128(fp128 %x) #0 {
+; CHECK-LABEL: tan_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl tanl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.tan.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: asin_f128:
-; CHECK: bl asinl
 define fp128 @asin_f128(fp128 %x) #0 {
+; CHECK-LABEL: asin_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl asinl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.asin.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: acos_f128:
-; CHECK: bl acosl
 define fp128 @acos_f128(fp128 %x) #0 {
+; CHECK-LABEL: acos_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl acosl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.acos.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: atan_f128:
-; CHECK: bl atanl
 define fp128 @atan_f128(fp128 %x) #0 {
+; CHECK-LABEL: atan_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl atanl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.atan.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: atan2_f128:
-; CHECK: bl atan2l
 define fp128 @atan2_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: atan2_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl atan2l
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: sinh_f128:
-; CHECK: bl sinhl
 define fp128 @sinh_f128(fp128 %x) #0 {
+; CHECK-LABEL: sinh_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sinhl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.sinh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: cosh_f128:
-; CHECK: bl coshl
 define fp128 @cosh_f128(fp128 %x) #0 {
+; CHECK-LABEL: cosh_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl coshl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.cosh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: tanh_f128:
-; CHECK: bl tanhl
 define fp128 @tanh_f128(fp128 %x) #0 {
+; CHECK-LABEL: tanh_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl tanhl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.tanh.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: pow_f128:
-; CHECK: bl powl
 define fp128 @pow_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: pow_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl powl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: log_f128:
-; CHECK: bl logl
 define fp128 @log_f128(fp128 %x) #0 {
+; CHECK-LABEL: log_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl logl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: log10_f128:
-; CHECK: bl log10l
 define fp128 @log10_f128(fp128 %x) #0 {
+; CHECK-LABEL: log10_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log10l
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: log2_f128:
-; CHECK: bl log2l
 define fp128 @log2_f128(fp128 %x) #0 {
+; CHECK-LABEL: log2_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log2l
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: exp_f128:
-; CHECK: bl expl
 define fp128 @exp_f128(fp128 %x) #0 {
+; CHECK-LABEL: exp_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl expl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: exp2_f128:
-; CHECK: bl exp2l
 define fp128 @exp2_f128(fp128 %x) #0 {
+; CHECK-LABEL: exp2_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl exp2l
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: rint_f128:
-; CHECK: bl rintl
 define fp128 @rint_f128(fp128 %x) #0 {
+; CHECK-LABEL: rint_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl rintl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: nearbyint_f128:
-; CHECK: bl nearbyintl
 define fp128 @nearbyint_f128(fp128 %x) #0 {
+; CHECK-LABEL: nearbyint_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl nearbyintl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: lrint_f128:
-; CHECK: bl lrintl
 define i32 @lrint_f128(fp128 %x) #0 {
+; CHECK-LABEL: lrint_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl lrintl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: llrint_f128:
-; CHECK: bl llrintl
 define i64 @llrint_f128(fp128 %x) #0 {
+; CHECK-LABEL: llrint_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: maxnum_f128:
-; CHECK: bl fmaxl
 define fp128 @maxnum_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: maxnum_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl fmaxl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: minnum_f128:
-; CHECK: bl fminl
 define fp128 @minnum_f128(fp128 %x, fp128 %y) #0 {
+; CHECK-LABEL: minnum_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl fminl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: ceil_f128:
-; CHECK: bl ceill
 define fp128 @ceil_f128(fp128 %x) #0 {
+; CHECK-LABEL: ceil_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl ceill
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: floor_f128:
-; CHECK: bl floorl
 define fp128 @floor_f128(fp128 %x) #0 {
+; CHECK-LABEL: floor_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl floorl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: lround_f128:
-; CHECK: bl lroundl
 define i32 @lround_f128(fp128 %x) #0 {
+; CHECK-LABEL: lround_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl lroundl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
-; CHECK-LABEL: llround_f128:
-; CHECK: bl llroundl
 define i64 @llround_f128(fp128 %x) #0 {
+; CHECK-LABEL: llround_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl llroundl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
-; CHECK-LABEL: round_f128:
-; CHECK: bl roundl
 define fp128 @round_f128(fp128 %x) #0 {
+; CHECK-LABEL: round_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl roundl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: trunc_f128:
-; CHECK: bl truncl
 define fp128 @trunc_f128(fp128 %x) #0 {
+; CHECK-LABEL: trunc_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl truncl
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: fcmp_olt_f128:
-; CHECK: bl __lttf2
 define i32 @fcmp_olt_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_olt_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __lttf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"olt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ole_f128:
-; CHECK: bl __letf2
 define i32 @fcmp_ole_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_ole_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __letf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ole", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ogt_f128:
-; CHECK: bl __gttf2
 define i32 @fcmp_ogt_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_ogt_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __gttf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ogt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_oge_f128:
-; CHECK: bl __getf2
 define i32 @fcmp_oge_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_oge_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __getf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"oge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_oeq_f128:
-; CHECK: bl __eqtf2
 define i32 @fcmp_oeq_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_oeq_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __eqtf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_one_f128:
-; CHECK: bl __eqtf2
 define i32 @fcmp_one_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_one_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT:    bl __eqtf2
+; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    bl __unordtf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    ccmp w19, #0, #4, eq
+; CHECK-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"one", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ult_f128:
-; CHECK: bl __getf2
 define i32 @fcmp_ult_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_ult_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __getf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ult", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ule_f128:
-; CHECK: bl __gttf2
 define i32 @fcmp_ule_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_ule_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __gttf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ule", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ugt_f128:
-; CHECK: bl __letf2
 define i32 @fcmp_ugt_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_ugt_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __letf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ugt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_uge_f128:
-; CHECK: bl __lttf2
 define i32 @fcmp_uge_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_uge_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __lttf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"uge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_ueq_f128:
-; CHECK: bl __eqtf2
 define i32 @fcmp_ueq_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_ueq_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT:    bl __eqtf2
+; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    bl __unordtf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    ccmp w19, #0, #4, eq
+; CHECK-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"ueq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmp_une_f128:
-; CHECK: bl __netf2
 define i32 @fcmp_une_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmp_une_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __netf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_olt_f128:
-; CHECK: bl __lttf2
 define i32 @fcmps_olt_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_olt_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __lttf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"olt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ole_f128:
-; CHECK: bl __letf2
 define i32 @fcmps_ole_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_ole_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __letf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ole", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ogt_f128:
-; CHECK: bl __gttf2
 define i32 @fcmps_ogt_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_ogt_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __gttf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ogt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_oge_f128:
-; CHECK: bl __getf2
 define i32 @fcmps_oge_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_oge_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __getf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"oge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_oeq_f128:
-; CHECK: bl __eqtf2
 define i32 @fcmps_oeq_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_oeq_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __eqtf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_one_f128:
-; CHECK: bl __eqtf2
 define i32 @fcmps_one_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_one_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT:    bl __eqtf2
+; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    bl __unordtf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    ccmp w19, #0, #4, eq
+; CHECK-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"one", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ult_f128:
-; CHECK: bl __getf2
 define i32 @fcmps_ult_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_ult_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __getf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, lt
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ult", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ule_f128:
-; CHECK: bl __gttf2
 define i32 @fcmps_ule_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_ule_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __gttf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, le
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ule", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ugt_f128:
-; CHECK: bl __letf2
 define i32 @fcmps_ugt_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_ugt_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __letf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ugt", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_uge_f128:
-; CHECK: bl __lttf2
 define i32 @fcmps_uge_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_uge_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __lttf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, ge
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"uge", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_ueq_f128:
-; CHECK: bl __eqtf2
 define i32 @fcmps_ueq_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_ueq_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-NEXT:    bl __eqtf2
+; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-NEXT:    mov w19, w0
+; CHECK-NEXT:    bl __unordtf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    ccmp w19, #0, #4, eq
+; CHECK-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"ueq", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-; CHECK-LABEL: fcmps_une_f128:
-; CHECK: bl __netf2
 define i32 @fcmps_une_f128(fp128 %a, fp128 %b) #0 {
+; CHECK-LABEL: fcmps_une_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __netf2
+; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    cset w0, ne
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %cmp = call i1 @llvm.experimental.constrained.fcmps.f128(fp128 %a, fp128 %b, metadata !"une", metadata !"fpexcept.strict") #0
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -1603,156 +2851,280 @@ define i32 @fcmps_une_f128(fp128 %a, fp128 %b) #0 {
 
 ; Intrinsics to convert between floating-point types
 
-; CHECK-LABEL: fptrunc_f32_f64:
-; CHECK: fcvt s0, d0
 define float @fptrunc_f32_f64(double %x) #0 {
+; CHECK-LABEL: fptrunc_f32_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvt s0, d0
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: fptrunc_f32_f128:
-; CHECK: bl __trunctfsf2
 define float @fptrunc_f32_f128(fp128 %x) #0 {
+; CHECK-LABEL: fptrunc_f32_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __trunctfsf2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %val
 }
 
-; CHECK-LABEL: fptrunc_f64_f128:
-; CHECK: bl __trunctfdf2
 define double @fptrunc_f64_f128(fp128 %x) #0 {
+; CHECK-LABEL: fptrunc_f64_f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: fpext_f64_f32:
-; CHECK: fcvt d0, s0
 define double @fpext_f64_f32(float %x) #0 {
+; CHECK-LABEL: fpext_f64_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fcvt d0, s0
+; CHECK-NEXT:    ret
   %val = call double @llvm.experimental.constrained.fpext.f64.f32(float %x, metadata !"fpexcept.strict") #0
   ret double %val
 }
 
-; CHECK-LABEL: fpext_f128_f32:
-; CHECK: bl __extendsftf2
 define fp128 @fpext_f128_f32(float %x) #0 {
+; CHECK-LABEL: fpext_f128_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __extendsftf2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %x, metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: fpext_f128_f64:
-; CHECK: bl __extenddftf2
 define fp128 @fpext_f128_f64(double %x) #0 {
+; CHECK-LABEL: fpext_f128_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __extenddftf2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %x, metadata !"fpexcept.strict") #0
   ret fp128 %val
 }
 
-; CHECK-LABEL: sin_v1f64:
-; CHECK: bl sin
 define <1 x double> @sin_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: sin_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sin
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.sin.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: cos_v1f64:
-; CHECK: bl cos
 define <1 x double> @cos_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: cos_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl cos
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.cos.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: tan_v1f64:
-; CHECK: bl tan
 define <1 x double> @tan_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: tan_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl tan
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.tan.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: asin_v1f64:
-; CHECK: bl asin
 define <1 x double> @asin_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: asin_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl asin
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.asin.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: acos_v1f64:
-; CHECK: bl acos
 define <1 x double> @acos_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: acos_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl acos
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.acos.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: atan_v1f64:
-; CHECK: bl atan
 define <1 x double> @atan_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: atan_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl atan
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.atan.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: atan2_v1f64:
-; CHECK: bl atan2
 define <1 x double> @atan2_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: atan2_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl atan2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.atan2.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: sinh_v1f64:
-; CHECK: bl sinh
 define <1 x double> @sinh_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: sinh_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl sinh
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.sinh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: cosh_v1f64:
-; CHECK: bl cosh
 define <1 x double> @cosh_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: cosh_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl cosh
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.cosh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: tanh_v1f64:
-; CHECK: bl tanh
 define <1 x double> @tanh_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: tanh_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl tanh
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.tanh.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: pow_v1f64:
-; CHECK: bl pow
 define <1 x double> @pow_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: pow_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl pow
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.pow.v1f64(<1 x double> %x, <1 x double> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: log_v1f64:
-; CHECK: bl log
 define <1 x double> @log_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: log_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.log.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: log2_v1f64:
-; CHECK: bl log2
 define <1 x double> @log2_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: log2_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.log2.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: log10_v1f64:
-; CHECK: bl log10
 define <1 x double> @log10_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: log10_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl log10
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.log10.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: exp_v1f64:
-; CHECK: bl exp
 define <1 x double> @exp_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: exp_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl exp
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.exp.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
 
-; CHECK-LABEL: exp2_v1f64:
-; CHECK: bl exp2
 define <1 x double> @exp2_v1f64(<1 x double> %x, <1 x double> %y) #0 {
+; CHECK-LABEL: exp2_v1f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl exp2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
   %val = call <1 x double> @llvm.experimental.constrained.exp2.v1f64(<1 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret <1 x double> %val
 }
@@ -1918,3 +3290,7 @@ declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata,
 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
 declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
 declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index 17c87a5dae4199..bfb5c67801e6c2 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
-; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 ;
 ; 32-bit float to signed integer
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 3c19fca4a22aef..0dea7be5052d03 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-CVT
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
-; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
-; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 ;
 ; 32-bit float to unsigned integer
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 20a6dd0899b40a..3037a9552bc27e 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i8 @llvm.fshl.i8(i8, i8, i8)
 declare i16 @llvm.fshl.i16(i16, i16, i16)
diff --git a/llvm/test/CodeGen/AArch64/itofp-bf16.ll b/llvm/test/CodeGen/AArch64/itofp-bf16.ll
index 978fe0b5ba3b3c..58591b11c184fb 100644
--- a/llvm/test/CodeGen/AArch64/itofp-bf16.ll
+++ b/llvm/test/CodeGen/AArch64/itofp-bf16.ll
@@ -4,6 +4,63 @@
 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
+; CHECK-GI:       warning: Instruction selection used fallback path for stofp_i64_bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_i64_bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_i32_bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_i32_bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_i16_bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_i16_bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_i8_bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_i8_bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v2i64_v2bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v2i64_v2bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v3i64_v3bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v3i64_v3bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v4i64_v4bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v4i64_v4bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v8i64_v8bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v8i64_v8bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v16i64_v16bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v16i64_v16bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v32i64_v32bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v32i64_v32bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v2i32_v2bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v2i32_v2bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v3i32_v3bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v3i32_v3bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v4i32_v4bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v4i32_v4bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v8i32_v8bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v8i32_v8bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v16i32_v16bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v16i32_v16bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v32i32_v32bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v32i32_v32bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v2i16_v2bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v2i16_v2bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v3i16_v3bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v3i16_v3bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v4i16_v4bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v4i16_v4bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v8i16_v8bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v8i16_v8bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v16i16_v16bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v16i16_v16bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v32i16_v32bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v32i16_v32bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v2i8_v2bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v2i8_v2bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v3i8_v3bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v3i8_v3bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v4i8_v4bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v4i8_v4bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v8i8_v8bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v8i8_v8bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v16i8_v16bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v16i8_v16bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for stofp_v32i8_v32bf16
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for utofp_v32i8_v32bf16
+
 define bfloat @stofp_i64_bf16(i64 %a) {
 ; CHECK-LABEL: stofp_i64_bf16:
 ; CHECK:       // %bb.0: // %entry
diff --git a/llvm/test/CodeGen/AArch64/mingw-refptr.ll b/llvm/test/CodeGen/AArch64/mingw-refptr.ll
index 306bee9f85c42e..cc9fac0506ff52 100644
--- a/llvm/test/CodeGen/AArch64/mingw-refptr.ll
+++ b/llvm/test/CodeGen/AArch64/mingw-refptr.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=aarch64-w64-mingw32 | FileCheck %s
-; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \
-; RUN:     -mtriple=aarch64-w64-mingw32 2>&1| FileCheck %s --check-prefixes=GISEL,FALLBACK
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-w64-mingw32 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64-w64-mingw32 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 @var = external local_unnamed_addr global i32, align 4
 @dsolocalvar = external dso_local local_unnamed_addr global i32, align 4
@@ -10,10 +10,11 @@
 
 define dso_local i32 @getVar() {
 ; CHECK-LABEL: getVar:
-; CHECK:    adrp x8, .refptr.var
-; CHECK:    ldr  x8, [x8, :lo12:.refptr.var]
-; CHECK:    ldr  w0, [x8]
-; CHECK:    ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .refptr.var
+; CHECK-NEXT:    ldr x8, [x8, :lo12:.refptr.var]
+; CHECK-NEXT:    ldr w0, [x8]
+; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @var, align 4
   ret i32 %0
@@ -21,9 +22,10 @@ entry:
 
 define dso_local i32 @getDsoLocalVar() {
 ; CHECK-LABEL: getDsoLocalVar:
-; CHECK:    adrp x8, dsolocalvar
-; CHECK:    ldr  w0, [x8, :lo12:dsolocalvar]
-; CHECK:    ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, dsolocalvar
+; CHECK-NEXT:    ldr w0, [x8, :lo12:dsolocalvar]
+; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @dsolocalvar, align 4
   ret i32 %0
@@ -31,9 +33,10 @@ entry:
 
 define dso_local i32 @getLocalVar() {
 ; CHECK-LABEL: getLocalVar:
-; CHECK:    adrp x8, localvar
-; CHECK:    ldr  w0, [x8, :lo12:localvar]
-; CHECK:    ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, localvar
+; CHECK-NEXT:    ldr w0, [x8, :lo12:localvar]
+; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @localvar, align 4
   ret i32 %0
@@ -41,9 +44,10 @@ entry:
 
 define dso_local i32 @getLocalCommon() {
 ; CHECK-LABEL: getLocalCommon:
-; CHECK:    adrp x8, localcommon
-; CHECK:    ldr  w0, [x8, :lo12:localcommon]
-; CHECK:    ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, localcommon
+; CHECK-NEXT:    ldr w0, [x8, :lo12:localcommon]
+; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @localcommon, align 4
   ret i32 %0
@@ -51,10 +55,11 @@ entry:
 
 define dso_local i32 @getExtVar() {
 ; CHECK-LABEL: getExtVar:
-; CHECK:    adrp x8, __imp_extvar
-; CHECK:    ldr  x8, [x8, :lo12:__imp_extvar]
-; CHECK:    ldr  w0, [x8]
-; CHECK:    ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, __imp_extvar
+; CHECK-NEXT:    ldr x8, [x8, :lo12:__imp_extvar]
+; CHECK-NEXT:    ldr w0, [x8]
+; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr @extvar, align 4
   ret i32 %0
@@ -62,7 +67,8 @@ entry:
 
 define dso_local void @callFunc() {
 ; CHECK-LABEL: callFunc:
-; CHECK:    b otherFunc
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    b otherFunc
 entry:
   tail call void @otherFunc()
   ret void
@@ -70,16 +76,40 @@ entry:
 
 declare dso_local void @otherFunc()
 
-; FALLBACK-NOT: remark:{{.*}}sspFunc
 define dso_local void @sspFunc() #0 {
 ; CHECK-LABEL: sspFunc:
-; CHECK:    adrp x8, .refptr.__stack_chk_guard
-; CHECK:    ldr  x8, [x8, :lo12:.refptr.__stack_chk_guard]
-; CHECK:    ldr  x8, [x8]
-; GISEL-LABEL: sspFunc:
-; GISEL:    adrp x8, .refptr.__stack_chk_guard
-; GISEL:    ldr  x8, [x8, :lo12:.refptr.__stack_chk_guard]
-; GISEL:    ldr  x8, [x8]
+; CHECK:       .seh_proc sspFunc
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    .seh_stackalloc 32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .seh_save_reg x30, 16
+; CHECK-NEXT:    .seh_endprologue
+; CHECK-NEXT:    adrp x8, .refptr.__stack_chk_guard
+; CHECK-NEXT:    add x0, sp, #7
+; CHECK-NEXT:    ldr x8, [x8, :lo12:.refptr.__stack_chk_guard]
+; CHECK-NEXT:    ldr x8, [x8]
+; CHECK-NEXT:    str x8, [sp, #8]
+; CHECK-NEXT:    bl ptrUser
+; CHECK-NEXT:    adrp x8, .refptr.__stack_chk_guard
+; CHECK-NEXT:    ldr x8, [x8, :lo12:.refptr.__stack_chk_guard]
+; CHECK-NEXT:    ldr x9, [sp, #8]
+; CHECK-NEXT:    ldr x8, [x8]
+; CHECK-NEXT:    cmp x8, x9
+; CHECK-NEXT:    b.ne .LBB6_2
+; CHECK-NEXT:  // %bb.1: // %entry
+; CHECK-NEXT:    .seh_startepilogue
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    .seh_save_reg x30, 16
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    .seh_stackalloc 32
+; CHECK-NEXT:    .seh_endepilogue
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB6_2: // %entry
+; CHECK-NEXT:    bl __stack_chk_fail
+; CHECK-NEXT:    brk #0x1
+; CHECK-NEXT:    .seh_endfunclet
+; CHECK-NEXT:    .seh_endproc
 entry:
   %c = alloca i8, align 1
   call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %c)
@@ -102,3 +132,7 @@ attributes #0 = { sspstrong }
 ; CHECK:        .globl  .refptr.var
 ; CHECK: .refptr.var:
 ; CHECK:        .xword  var
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-GI: {{.*}}
+; CHECK-SD: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll
index 32bc5c5e63b3e1..5b9f438ed1d437 100644
--- a/llvm/test/CodeGen/AArch64/mulcmle.ll
+++ b/llvm/test/CodeGen/AArch64/mulcmle.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 %s -o - -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 %s -o - -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <1 x i64> @v1i64(<1 x i64> %a) {
 ; CHECK-SD-LABEL: v1i64:
diff --git a/llvm/test/CodeGen/AArch64/overflow.ll b/llvm/test/CodeGen/AArch64/overflow.ll
index 977141f2b84f4f..489d46f8b0e727 100644
--- a/llvm/test/CodeGen/AArch64/overflow.ll
+++ b/llvm/test/CodeGen/AArch64/overflow.ll
@@ -1,7 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,SDAG
-; RUN: llc < %s -mtriple=arm64-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,GISEL
-
+; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=arm64-eabi -global-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define zeroext i1 @saddo1.i32.unused(i32 %v1, i32 %v2, ptr %res) {
 ; CHECK-LABEL: saddo1.i32.unused:
@@ -105,19 +104,19 @@ entry:
   ret i1 %obit
 }
 define zeroext i1 @saddo.add.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) {
-; SDAG-LABEL: saddo.add.i32:
-; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    add w8, w4, #100
-; SDAG-NEXT:    subs w8, w8, #100
-; SDAG-NEXT:    cset w0, vs
-; SDAG-NEXT:    str w8, [x5]
-; SDAG-NEXT:    ret
+; CHECK-SD-LABEL: saddo.add.i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    add w8, w4, #100
+; CHECK-SD-NEXT:    subs w8, w8, #100
+; CHECK-SD-NEXT:    cset w0, vs
+; CHECK-SD-NEXT:    str w8, [x5]
+; CHECK-SD-NEXT:    ret
 ;
-; GISEL-LABEL: saddo.add.i32:
-; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    mov w0, wzr
-; GISEL-NEXT:    str w4, [x5]
-; GISEL-NEXT:    ret
+; CHECK-GI-LABEL: saddo.add.i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w0, wzr
+; CHECK-GI-NEXT:    str w4, [x5]
+; CHECK-GI-NEXT:    ret
 entry:
   %lhs = add nsw i32 %v5, 100
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %lhs, i32 -100)
@@ -128,20 +127,20 @@ entry:
 }
 
 define zeroext i1 @uaddo.add.i32(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, ptr %res) {
-; SDAG-LABEL: uaddo.add.i32:
-; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    add w8, w4, #5
-; SDAG-NEXT:    adds w8, w8, #5
-; SDAG-NEXT:    cset w0, hs
-; SDAG-NEXT:    str w8, [x5]
-; SDAG-NEXT:    ret
+; CHECK-SD-LABEL: uaddo.add.i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    add w8, w4, #5
+; CHECK-SD-NEXT:    adds w8, w8, #5
+; CHECK-SD-NEXT:    cset w0, hs
+; CHECK-SD-NEXT:    str w8, [x5]
+; CHECK-SD-NEXT:    ret
 ;
-; GISEL-LABEL: uaddo.add.i32:
-; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    adds w8, w4, #10
-; GISEL-NEXT:    cset w0, hs
-; GISEL-NEXT:    str w8, [x5]
-; GISEL-NEXT:    ret
+; CHECK-GI-LABEL: uaddo.add.i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adds w8, w4, #10
+; CHECK-GI-NEXT:    cset w0, hs
+; CHECK-GI-NEXT:    str w8, [x5]
+; CHECK-GI-NEXT:    ret
 entry:
   %lhs = add nuw i32 %v5, 5
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %lhs, i32 5)
diff --git a/llvm/test/CodeGen/AArch64/phi.ll b/llvm/test/CodeGen/AArch64/phi.ll
index eeafbaffbcc695..55942d0e421bb9 100644
--- a/llvm/test/CodeGen/AArch64/phi.ll
+++ b/llvm/test/CodeGen/AArch64/phi.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64 -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define i8 @ti8(i1 %c, ptr %p, i8 %a, i8 %b) {
 ; CHECK-SD-LABEL: ti8:
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll
index cb52c17e2531c8..d07fcbc29806f4 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i4 @llvm.sadd.sat.i4(i4, i4)
 declare i8 @llvm.sadd.sat.i8(i8, i8)
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll
index f6fb4dd5e4b417..4a0e49518517bf 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i4 @llvm.sadd.sat.i4(i4, i4)
 declare i8 @llvm.sadd.sat.i8(i8, i8)
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index 29318bd28c45d4..6a4ab837fc4720 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -2,6 +2,10 @@
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
+; CHECK-GI:       warning: Instruction selection used fallback path for v16i4
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v2i128
+
 declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>)
 declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>)
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index 3604db33d5c4b3..53fbb351954fcf 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define i16 @sext_i8_to_i16(i8 %a) {
 ; CHECK-LABEL: sext_i8_to_i16:
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll
index cf201d628b7e1e..23550d3c41cc7d 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i4 @llvm.ssub.sat.i4(i4, i4)
 declare i8 @llvm.ssub.sat.i8(i8, i8)
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll
index cabd580e20d504..f08629c15f26c6 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i4 @llvm.ssub.sat.i4(i4, i4)
 declare i8 @llvm.ssub.sat.i8(i8, i8)
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index 30e2a70ace0722..86a503038766c6 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -2,6 +2,10 @@
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
+; CHECK-GI:       warning: Instruction selection used fallback path for v16i4
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v2i128
+
 declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>)
 declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>)
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat.ll b/llvm/test/CodeGen/AArch64/uadd_sat.ll
index ccf46e8fce2e15..e9d22c7be52efe 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i4 @llvm.uadd.sat.i4(i4, i4)
 declare i8 @llvm.uadd.sat.i8(i8, i8)
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll
index d29564029544c9..5c81e3f20277a7 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i4 @llvm.uadd.sat.i4(i4, i4)
 declare i8 @llvm.uadd.sat.i8(i8, i8)
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index badd31c1c561c5..d4587c3439967a 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -2,6 +2,10 @@
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
+; CHECK-GI:       warning: Instruction selection used fallback path for v16i4
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v2i128
+
 declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>)
 declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>)
diff --git a/llvm/test/CodeGen/AArch64/usub_sat.ll b/llvm/test/CodeGen/AArch64/usub_sat.ll
index 160e7e6607cdc3..54d7fc5a63b115 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i4 @llvm.usub.sat.i4(i4, i4)
 declare i8 @llvm.usub.sat.i8(i8, i8)
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll
index a9932216dbe34c..2793aeb163c94d 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_plus.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i4 @llvm.usub.sat.i4(i4, i4)
 declare i8 @llvm.usub.sat.i8(i8, i8)
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index 45418b5c648fa3..123f4280bd8ff2 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -2,6 +2,10 @@
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
+; CHECK-GI:       warning: Instruction selection used fallback path for v16i4
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v2i128
+
 declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>)
 declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
 declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index d71aed2d17506b..809a6d6556a7be 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a)
 declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a)

>From ab781629100fafae371a0ea5a9d34aef14870c1d Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Sat, 26 Oct 2024 19:21:16 +0100
Subject: [PATCH 12/12] [libc++] implement `std::flat_multimap`

fix byte include
---
 libcxx/docs/Status/Cxx23Papers.csv            |    2 +-
 libcxx/include/CMakeLists.txt                 |    2 +
 libcxx/include/__flat_map/flat_multimap.h     | 1074 +++++++++++++++++
 libcxx/include/__flat_map/sorted_equivalent.h |   31 +
 libcxx/include/flat_map                       |   21 +
 libcxx/include/module.modulemap               |    2 +
 libcxx/include/version                        |    1 +
 libcxx/modules/std/flat_map.inc               |    4 +-
 .../flat.map.syn/sorted_equivalent.pass.cpp   |   50 +
 .../flat.multimap.capacity/empty.pass.cpp     |   49 +
 .../flat.multimap.capacity/empty.verify.cpp   |   26 +
 .../flat.multimap.capacity/max_size.pass.cpp  |   78 ++
 .../flat.multimap.capacity/size.pass.cpp      |   70 ++
 .../flat.multimap.cons/alloc.pass.cpp         |   72 ++
 .../assign_initializer_list.pass.cpp          |   58 +
 .../flat.multimap.cons/compare.pass.cpp       |   93 ++
 .../flat.multimap.cons/containers.pass.cpp    |  187 +++
 .../flat.multimap.cons/copy.pass.cpp          |   70 ++
 .../flat.multimap.cons/copy_alloc.pass.cpp    |   67 +
 .../copy_assign.addressof.compile.pass.cpp    |   30 +
 .../flat.multimap.cons/copy_assign.pass.cpp   |   81 ++
 .../flat.multimap.cons/deduct.pass.cpp        |  344 ++++++
 .../flat.multimap.cons/deduct.verify.cpp      |   97 ++
 .../flat.multimap.cons/deduct_pmr.pass.cpp    |  107 ++
 .../flat.multimap.cons/default.pass.cpp       |   72 ++
 .../default_noexcept.pass.cpp                 |   59 +
 .../flat.multimap.cons/dtor_noexcept.pass.cpp |   53 +
 .../initializer_list.pass.cpp                 |  159 +++
 .../flat.multimap.cons/iter_iter.pass.cpp     |  154 +++
 .../iter_iter_stability.pass.cpp              |   66 +
 .../flat.multimap.cons/move.pass.cpp          |   89 ++
 .../flat.multimap.cons/move_alloc.pass.cpp    |   82 ++
 .../flat.multimap.cons/move_assign.pass.cpp   |   74 ++
 .../move_assign_clears.pass.cpp               |  101 ++
 .../move_assign_noexcept.pass.cpp             |  110 ++
 .../move_exceptions.pass.cpp                  |   71 ++
 .../flat.multimap.cons/move_noexcept.pass.cpp |  104 ++
 .../flat.multimap.cons/pmr.pass.cpp           |  361 ++++++
 .../flat.multimap.cons/range.pass.cpp         |  227 ++++
 .../sorted_container.pass.cpp                 |  165 +++
 .../sorted_initializer_list.pass.cpp          |  183 +++
 .../sorted_iter_iter.pass.cpp                 |  173 +++
 .../flat.multimap.erasure/erase_if.pass.cpp   |   98 ++
 .../erase_if_exceptions.pass.cpp              |  157 +++
 .../flat.multimap.iterators/iterator.pass.cpp |  105 ++
 .../iterator_comparison.pass.cpp              |  155 +++
 ...rator_concept_conformance.compile.pass.cpp |   84 ++
 ...range_concept_conformance.compile.pass.cpp |   55 +
 .../reverse_iterator.pass.cpp                 |   92 ++
 .../flat.multimap.modifiers/clear.pass.cpp    |   64 +
 .../flat.multimap.modifiers/emplace.pass.cpp  |  158 +++
 .../emplace_hint.pass.cpp                     |  228 ++++
 .../erase_iter.pass.cpp                       |  127 ++
 .../erase_iter_iter.pass.cpp                  |   99 ++
 .../erase_key.pass.cpp                        |   99 ++
 .../erase_key_transparent.pass.cpp            |  161 +++
 .../flat.multimap.modifiers/extract.pass.cpp  |   93 ++
 .../insert_cv.pass.cpp                        |   81 ++
 .../insert_initializer_list.pass.cpp          |   83 ++
 .../insert_iter_cv.pass.cpp                   |   95 ++
 .../insert_iter_iter.pass.cpp                 |  109 ++
 .../insert_iter_rv.pass.cpp                   |  103 ++
 .../insert_range.pass.cpp                     |  101 ++
 .../insert_range_stability.pass.cpp           |   65 +
 .../insert_rv.pass.cpp                        |  116 ++
 .../insert_sorted_initializer_list.pass.cpp   |   66 +
 .../insert_sorted_iter_iter.pass.cpp          |   94 ++
 .../insert_transparent.pass.cpp               |  135 +++
 .../flat.multimap.modifiers/replace.pass.cpp  |   82 ++
 .../swap_exception.pass.cpp                   |   80 ++
 .../swap_free.pass.cpp                        |   99 ++
 .../swap_member.pass.cpp                      |   97 ++
 .../flat.multimap.observers/comp.pass.cpp     |   98 ++
 .../keys_values.pass.cpp                      |   59 +
 .../contains.pass.cpp                         |   72 ++
 .../contains_transparent.pass.cpp             |   73 ++
 .../flat.multimap.operations/count.pass.cpp   |   71 ++
 .../count_transparent.pass.cpp                |   83 ++
 .../equal_range.pass.cpp                      |   81 ++
 .../equal_range_transparent.pass.cpp          |  110 ++
 .../flat.multimap.operations/find.pass.cpp    |   57 +
 .../find_transparent.pass.cpp                 |   99 ++
 .../lower_bound.pass.cpp                      |   73 ++
 .../lower_bound_transparent.pass.cpp          |  107 ++
 .../upper_bound.pass.cpp                      |   76 ++
 .../upper_bound_transparent.pass.cpp          |  106 ++
 .../flat.multimap/helpers.h                   |  389 ++++++
 .../flat.multimap/incomplete_type.pass.cpp    |   33 +
 .../flat.multimap/op_compare.pass.cpp         |  133 ++
 .../flat.multimap/types.compile.pass.cpp      |  133 ++
 90 files changed, 9949 insertions(+), 4 deletions(-)
 create mode 100644 libcxx/include/__flat_map/flat_multimap.h
 create mode 100644 libcxx/include/__flat_map/sorted_equivalent.h
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_equivalent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/empty.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/empty.verify.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/max_size.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/size.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/alloc.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/assign_initializer_list.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/compare.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/containers.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_alloc.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_assign.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_assign.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct.verify.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct_pmr.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/default.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/default_noexcept.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/dtor_noexcept.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/initializer_list.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/iter_iter.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/iter_iter_stability.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_alloc.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign_clears.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign_noexcept.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_exceptions.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_noexcept.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/pmr.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/range.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_container.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_initializer_list.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_iter_iter.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.erasure/erase_if.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.erasure/erase_if_exceptions.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator_comparison.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator_concept_conformance.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/range_concept_conformance.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/reverse_iterator.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/clear.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/emplace.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/emplace_hint.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_iter.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_iter_iter.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_key.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_key_transparent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/extract.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_cv.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_initializer_list.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_cv.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_iter.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_rv.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_range.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_range_stability.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_rv.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_sorted_initializer_list.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_sorted_iter_iter.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_transparent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/replace.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_exception.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_free.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_member.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.observers/comp.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.observers/keys_values.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/contains.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/contains_transparent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/count.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/count_transparent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/equal_range.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/equal_range_transparent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/find.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/find_transparent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/lower_bound.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/lower_bound_transparent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/upper_bound.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/upper_bound_transparent.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/helpers.h
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/incomplete_type.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/op_compare.pass.cpp
 create mode 100644 libcxx/test/std/containers/container.adaptors/flat.multimap/types.compile.pass.cpp

diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv
index 4a4fbe9bec2224..bc9d4f8866a731 100644
--- a/libcxx/docs/Status/Cxx23Papers.csv
+++ b/libcxx/docs/Status/Cxx23Papers.csv
@@ -52,7 +52,7 @@
 "`P2443R1 <https://wg21.link/P2443R1>`__","``views::chunk_by``","2022-02 (Virtual)","|Complete|","18",""
 "","","","","",""
 "`P0009R18 <https://wg21.link/P0009R18>`__","mdspan: A Non-Owning Multidimensional Array Reference","2022-07 (Virtual)","|Complete|","18",""
-"`P0429R9 <https://wg21.link/P0429R9>`__","A Standard ``flat_map``","2022-07 (Virtual)","|In progress|","",""
+"`P0429R9 <https://wg21.link/P0429R9>`__","A Standard ``flat_map``","2022-07 (Virtual)","|Complete|","",""
 "`P1169R4 <https://wg21.link/P1169R4>`__","``static operator()``","2022-07 (Virtual)","|Complete|","16",""
 "`P1222R4 <https://wg21.link/P1222R4>`__","A Standard ``flat_set``","2022-07 (Virtual)","","",""
 "`P1223R5 <https://wg21.link/P1223R5>`__","``ranges::find_last()``, ``ranges::find_last_if()``, and ``ranges::find_last_if_not()``","2022-07 (Virtual)","|Complete|","19",""
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index b37b00165fd6ac..506d3ff68c869a 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -358,8 +358,10 @@ set(files
   __filesystem/space_info.h
   __filesystem/u8path.h
   __flat_map/flat_map.h
+  __flat_map/flat_multimap.h
   __flat_map/key_value_iterator.h
   __flat_map/sorted_unique.h
+  __flat_map/sorted_equivalent.h
   __format/buffer.h
   __format/concepts.h
   __format/container_adaptor.h
diff --git a/libcxx/include/__flat_map/flat_multimap.h b/libcxx/include/__flat_map/flat_multimap.h
new file mode 100644
index 00000000000000..f951a76efcaacc
--- /dev/null
+++ b/libcxx/include/__flat_map/flat_multimap.h
@@ -0,0 +1,1074 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FLAT_MAP_FLAT_MULTIMAP_H
+#define _LIBCPP___FLAT_MAP_FLAT_MULTIMAP_H
+
+#include "key_value_iterator.h"
+#include <__algorithm/lexicographical_compare_three_way.h>
+#include <__algorithm/min.h>
+#include <__algorithm/ranges_equal.h>
+#include <__algorithm/ranges_equal_range.h>
+#include <__algorithm/ranges_inplace_merge.h>
+#include <__algorithm/ranges_is_sorted.h>
+#include <__algorithm/ranges_lower_bound.h>
+#include <__algorithm/ranges_partition_point.h>
+#include <__algorithm/ranges_stable_sort.h>
+#include <__algorithm/ranges_unique.h>
+#include <__algorithm/ranges_upper_bound.h>
+#include <__algorithm/remove_if.h>
+#include <__assert>
+#include <__compare/synth_three_way.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/swappable.h>
+#include <__config>
+#include <__cstddef/byte.h>
+#include <__cstddef/ptrdiff_t.h>
+#include <__flat_map/key_value_iterator.h>
+#include <__flat_map/sorted_equivalent.h>
+#include <__functional/invoke.h>
+#include <__functional/is_transparent.h>
+#include <__functional/operations.h>
+#include <__iterator/concepts.h>
+#include <__iterator/distance.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/ranges_iterator_traits.h>
+#include <__iterator/reverse_iterator.h>
+#include <__memory/allocator_traits.h>
+#include <__memory/uses_allocator.h>
+#include <__memory/uses_allocator_construction.h>
+#include <__ranges/access.h>
+#include <__ranges/concepts.h>
+#include <__ranges/container_compatible_range.h>
+#include <__ranges/drop_view.h>
+#include <__ranges/from_range.h>
+#include <__ranges/ref_view.h>
+#include <__ranges/size.h>
+#include <__ranges/subrange.h>
+#include <__ranges/zip_view.h>
+#include <__type_traits/conjunction.h>
+#include <__type_traits/container_traits.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_allocator.h>
+#include <__type_traits/is_nothrow_constructible.h>
+#include <__type_traits/is_same.h>
+#include <__type_traits/maybe_const.h>
+#include <__utility/exception_guard.h>
+#include <__utility/move.h>
+#include <__utility/pair.h>
+#include <__utility/scope_guard.h>
+#include <__vector/vector.h>
+#include <initializer_list>
+#include <stdexcept>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+#if _LIBCPP_STD_VER >= 23
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Key,
+          class _Tp,
+          class _Compare         = less<_Key>,
+          class _KeyContainer    = vector<_Key>,
+          class _MappedContainer = vector<_Tp>>
+class flat_multimap {
+  template <class, class, class, class, class>
+  friend class flat_multimap;
+
+  static_assert(is_same_v<_Key, typename _KeyContainer::value_type>);
+  static_assert(is_same_v<_Tp, typename _MappedContainer::value_type>);
+  static_assert(!is_same_v<_KeyContainer, std::vector<bool>>, "vector<bool> is not a sequence container");
+  static_assert(!is_same_v<_MappedContainer, std::vector<bool>>, "vector<bool> is not a sequence container");
+
+  template <bool _Const>
+  using __iterator = __key_value_iterator<flat_multimap, _KeyContainer, _MappedContainer, _Const>;
+
+public:
+  // types
+  using key_type               = _Key;
+  using mapped_type            = _Tp;
+  using value_type             = pair<key_type, mapped_type>;
+  using key_compare            = __type_identity_t<_Compare>;
+  using reference              = pair<const key_type&, mapped_type&>;
+  using const_reference        = pair<const key_type&, const mapped_type&>;
+  using size_type              = size_t;
+  using difference_type        = ptrdiff_t;
+  using iterator               = __iterator<false>; // see [container.requirements]
+  using const_iterator         = __iterator<true>;  // see [container.requirements]
+  using reverse_iterator       = std::reverse_iterator<iterator>;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+  using key_container_type     = _KeyContainer;
+  using mapped_container_type  = _MappedContainer;
+
+  class value_compare {
+  private:
+    key_compare __comp_;
+    _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {}
+    friend flat_multimap;
+
+  public:
+    _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const {
+      return __comp_(__x.first, __y.first);
+    }
+  };
+
+  struct containers {
+    key_container_type keys;
+    mapped_container_type values;
+  };
+
+private:
+  template <class _Allocator>
+  _LIBCPP_HIDE_FROM_ABI static constexpr bool __allocator_ctor_constraint =
+      _And<uses_allocator<key_container_type, _Allocator>, uses_allocator<mapped_container_type, _Allocator>>::value;
+
+  _LIBCPP_HIDE_FROM_ABI static constexpr bool __is_compare_transparent = __is_transparent_v<_Compare, _Compare>;
+
+public:
+  // [flat.map.cons], construct/copy/destroy
+  _LIBCPP_HIDE_FROM_ABI flat_multimap() noexcept(
+      is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_MappedContainer> &&
+      is_nothrow_default_constructible_v<_Compare>)
+      : __containers_(), __compare_() {}
+
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(const flat_multimap&) = default;
+
+  // The copy/move constructors are not specified in the spec, which means they should be defaulted.
+  // However, the move constructor can potentially leave a moved-from object in an inconsistent
+  // state if an exception is thrown.
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(flat_multimap&& __other) noexcept(
+      is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_MappedContainer> &&
+      is_nothrow_move_constructible_v<_Compare>)
+#  if _LIBCPP_HAS_EXCEPTIONS
+      try
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+      : __containers_(std::move(__other.__containers_)), __compare_(std::move(__other.__compare_)) {
+    __other.clear();
+#  if _LIBCPP_HAS_EXCEPTIONS
+  } catch (...) {
+    __other.clear();
+    // gcc does not like the `throw` keyword in a conditional noexcept function
+    if constexpr (!(is_nothrow_move_constructible_v<_KeyContainer> &&
+                    is_nothrow_move_constructible_v<_MappedContainer> && is_nothrow_move_constructible_v<_Compare>)) {
+      throw;
+    }
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(const flat_multimap& __other, const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_tag{},
+                      __alloc,
+                      __other.__containers_.keys,
+                      __other.__containers_.values,
+                      __other.__compare_) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(flat_multimap&& __other, const _Allocator& __alloc)
+#  if _LIBCPP_HAS_EXCEPTIONS
+      try
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+      : flat_multimap(__ctor_uses_allocator_tag{},
+                      __alloc,
+                      std::move(__other.__containers_.keys),
+                      std::move(__other.__containers_.values),
+                      std::move(__other.__compare_)) {
+    __other.clear();
+#  if _LIBCPP_HAS_EXCEPTIONS
+  } catch (...) {
+    __other.clear();
+    throw;
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+  }
+
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(
+      key_container_type __key_cont, mapped_container_type __mapped_cont, const key_compare& __comp = key_compare())
+      : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_multimap keys and mapped containers have different size");
+    __sort();
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(
+      const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_multimap keys and mapped containers have different size");
+    __sort();
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(const key_container_type& __key_cont,
+                const mapped_container_type& __mapped_cont,
+                const key_compare& __comp,
+                const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_multimap keys and mapped containers have different size");
+    __sort();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(sorted_equivalent_t,
+                key_container_type __key_cont,
+                mapped_container_type __mapped_cont,
+                const key_compare& __comp = key_compare())
+      : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_multimap keys and mapped containers have different size");
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__containers_.keys), "Key container is not sorted");
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(sorted_equivalent_t,
+                const key_container_type& __key_cont,
+                const mapped_container_type& __mapped_cont,
+                const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_multimap keys and mapped containers have different size");
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__containers_.keys), "Key container is not sorted");
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(sorted_equivalent_t,
+                const key_container_type& __key_cont,
+                const mapped_container_type& __mapped_cont,
+                const key_compare& __comp,
+                const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_multimap keys and mapped containers have different size");
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__containers_.keys), "Key container is not sorted");
+  }
+
+  _LIBCPP_HIDE_FROM_ABI explicit flat_multimap(const key_compare& __comp) : __containers_(), __compare_(__comp) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(const key_compare& __comp, const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI explicit flat_multimap(const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {}
+
+  template <class _InputIterator>
+    requires __has_input_iterator_category<_InputIterator>::value
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare())
+      : __containers_(), __compare_(__comp) {
+    insert(__first, __last);
+  }
+
+  template <class _InputIterator, class _Allocator>
+    requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
+    insert(__first, __last);
+  }
+
+  template <class _InputIterator, class _Allocator>
+    requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {
+    insert(__first, __last);
+  }
+
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t __fr, _Range&& __rg)
+      : flat_multimap(__fr, std::forward<_Range>(__rg), key_compare()) {}
+
+  template <_ContainerCompatibleRange<value_type> _Range, class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {
+    insert_range(std::forward<_Range>(__rg));
+  }
+
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_multimap(__comp) {
+    insert_range(std::forward<_Range>(__rg));
+  }
+
+  template <_ContainerCompatibleRange<value_type> _Range, class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
+    insert_range(std::forward<_Range>(__rg));
+  }
+
+  template <class _InputIterator>
+    requires __has_input_iterator_category<_InputIterator>::value
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(
+      sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare())
+      : __containers_(), __compare_(__comp) {
+    insert(sorted_equivalent, __first, __last);
+  }
+  template <class _InputIterator, class _Allocator>
+    requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(sorted_equivalent_t,
+                _InputIterator __first,
+                _InputIterator __last,
+                const key_compare& __comp,
+                const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
+    insert(sorted_equivalent, __first, __last);
+  }
+
+  template <class _InputIterator, class _Allocator>
+    requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
+      : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {
+    insert(sorted_equivalent, __first, __last);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
+      : flat_multimap(__il.begin(), __il.end(), __comp) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(initializer_list<value_type> __il, const key_compare& __comp, const _Allocator& __alloc)
+      : flat_multimap(__il.begin(), __il.end(), __comp, __alloc) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(initializer_list<value_type> __il, const _Allocator& __alloc)
+      : flat_multimap(__il.begin(), __il.end(), __alloc) {}
+
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(sorted_equivalent_t, initializer_list<value_type> __il, const key_compare& __comp = key_compare())
+      : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __comp) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(
+      sorted_equivalent_t, initializer_list<value_type> __il, const key_compare& __comp, const _Allocator& __alloc)
+      : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __comp, __alloc) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(sorted_equivalent_t, initializer_list<value_type> __il, const _Allocator& __alloc)
+      : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __alloc) {}
+
+  _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(initializer_list<value_type> __il) {
+    clear();
+    insert(__il);
+    return *this;
+  }
+
+  // copy/move assignment are not specified in the spec (defaulted)
+  // but move assignment can potentially leave moved from object in an inconsistent
+  // state if an exception is thrown
+  _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(const flat_multimap&) = default;
+
+  _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(flat_multimap&& __other) noexcept(
+      is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_MappedContainer> &&
+      is_nothrow_move_assignable_v<_Compare>) {
+    auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; });
+    auto __clear_self_guard  = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    __containers_            = std::move(__other.__containers_);
+    __compare_               = std::move(__other.__compare_);
+    __clear_self_guard.__complete();
+    return *this;
+  }
+
+  // iterators
+  _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept {
+    return iterator(__containers_.keys.begin(), __containers_.values.begin());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept {
+    return const_iterator(__containers_.keys.begin(), __containers_.values.begin());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator end() noexcept {
+    return iterator(__containers_.keys.end(), __containers_.values.end());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept {
+    return const_iterator(__containers_.keys.end(), __containers_.values.end());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); }
+  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); }
+  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); }
+  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); }
+  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); }
+  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); }
+  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); }
+
+  // [flat.map.capacity], capacity
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __containers_.keys.empty(); }
+
+  _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __containers_.keys.size(); }
+
+  _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept {
+    return std::min<size_type>(__containers_.keys.max_size(), __containers_.values.max_size());
+  }
+
+  // [flat.map.modifiers], modifiers
+  template <class... _Args>
+    requires is_constructible_v<pair<key_type, mapped_type>, _Args...> && is_move_constructible_v<key_type> &&
+             is_move_constructible_v<mapped_type>
+  _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) {
+    std::pair<key_type, mapped_type> __pair(std::forward<_Args>(__args)...);
+    return __search_and_emplace_with(ranges::upper_bound, std::move(__pair.first), std::move(__pair.second));
+  }
+
+  template <class... _Args>
+    requires is_constructible_v<pair<key_type, mapped_type>, _Args...>
+  _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) {
+    std::pair<key_type, mapped_type> __pair(std::forward<_Args>(__args)...);
+
+    auto __prev_bigger  = __hint != cbegin() && __compare_(__pair.first, (__hint - 1)->first);
+    auto __next_smaller = __hint != cend() && __compare_(__hint->first, __pair.first);
+
+    if (!__prev_bigger && !__next_smaller) {
+      // hint correct, just emplace
+      return __emplace_exact_pos(
+          __hint.__key_iter_, __hint.__mapped_iter_, std::move(__pair.first), std::move(__pair.second));
+    } else if (__prev_bigger && !__next_smaller) {
+      // the hint position is more to the right then the key should have been.
+      // we want to emplace the element to a position as right as possible
+      // e.g. Insert new element "2" in the following range
+      // 1, 1, 2, 2, 2, 3, 4, 6
+      //                  ^
+      //                  |
+      //                 hint
+      // We want to insert "2" after the last existing "2"
+      return __search_and_emplace_with(ranges::upper_bound, std::move(__pair.first), std::move(__pair.second));
+    } else {
+      // !__prev_bigger && __next_smaller
+      // todo assert this condition.  it is not possible that __prev_bigger && __next_smaller, otherwise the multimap is
+      // not sorted
+
+      // the hint position is more to the left then the key should have been.
+      // we want to emplace the element to a position as left as possible
+      //  1, 1, 2, 2, 2, 3, 4, 6
+      // ^
+      // |
+      // hint
+      // We want to insert "2" before the first existing "2"
+      return __search_and_emplace_with(ranges::lower_bound, std::move(__pair.first), std::move(__pair.second));
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __x) { return emplace(__x); }
+
+  _LIBCPP_HIDE_FROM_ABI iterator insert(value_type&& __x) { return emplace(std::move(__x)); }
+
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) {
+    return emplace_hint(__hint, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) {
+    return emplace_hint(__hint, std::move(__x));
+  }
+
+  template <class _Pp>
+    requires is_constructible_v<pair<key_type, mapped_type>, _Pp>
+  _LIBCPP_HIDE_FROM_ABI iterator insert(_Pp&& __x) {
+    return emplace(std::forward<_Pp>(__x));
+  }
+
+  template <class _Pp>
+    requires is_constructible_v<pair<key_type, mapped_type>, _Pp>
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, _Pp&& __x) {
+    return emplace_hint(__hint, std::forward<_Pp>(__x));
+  }
+
+  template <class _InputIterator>
+    requires __has_input_iterator_category<_InputIterator>::value
+  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) {
+    if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) {
+      __reserve(__last - __first);
+    }
+    __append_sort_merge</*WasSorted = */ false>(std::move(__first), std::move(__last));
+  }
+
+  template <class _InputIterator>
+    requires __has_input_iterator_category<_InputIterator>::value
+  _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) {
+    if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) {
+      __reserve(__last - __first);
+    }
+
+    __append_sort_merge</*WasSorted = */ true>(std::move(__first), std::move(__last));
+  }
+
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) {
+    if constexpr (ranges::sized_range<_Range>) {
+      __reserve(ranges::size(__range));
+    }
+
+    __append_sort_merge</*WasSorted = */ false>(ranges::begin(__range), ranges::end(__range));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void insert(initializer_list<value_type> __il) { insert(__il.begin(), __il.end()); }
+
+  _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, initializer_list<value_type> __il) {
+    insert(sorted_equivalent, __il.begin(), __il.end());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI containers extract() && {
+    auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; });
+    auto __ret   = std::move(__containers_);
+    return __ret;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(
+        __key_cont.size() == __mapped_cont.size(), "flat_multimap keys and mapped containers have different size");
+
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__key_cont), "Key container is not sorted");
+    auto __guard         = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    __containers_.keys   = std::move(__key_cont);
+    __containers_.values = std::move(__mapped_cont);
+    __guard.__complete();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) {
+    return __erase(__position.__key_iter_, __position.__mapped_iter_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) {
+    return __erase(__position.__key_iter_, __position.__mapped_iter_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) {
+    auto [__first, __last] = equal_range(__x);
+    auto __res             = __last - __first;
+    erase(__first, __last);
+    return __res;
+  }
+
+  template <class _Kp>
+    requires(__is_compare_transparent && !is_convertible_v<_Kp &&, iterator> &&
+             !is_convertible_v<_Kp &&, const_iterator>)
+  _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) {
+    auto [__first, __last] = equal_range(__x);
+    auto __res             = __last - __first;
+    erase(__first, __last);
+    return __res;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) {
+    auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    auto __key_it     = __containers_.keys.erase(__first.__key_iter_, __last.__key_iter_);
+    auto __mapped_it  = __containers_.values.erase(__first.__mapped_iter_, __last.__mapped_iter_);
+    __on_failure.__complete();
+    return iterator(std::move(__key_it), std::move(__mapped_it));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void swap(flat_multimap& __y) noexcept {
+    // warning: The spec has unconditional noexcept, which means that
+    // if any of the following functions throw an exception,
+    // std::terminate will be called
+    ranges::swap(__compare_, __y.__compare_);
+    ranges::swap(__containers_.keys, __y.__containers_.keys);
+    ranges::swap(__containers_.values, __y.__containers_.values);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void clear() noexcept {
+    __containers_.keys.clear();
+    __containers_.values.clear();
+  }
+
+  // observers
+  _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; }
+  _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__compare_); }
+
+  _LIBCPP_HIDE_FROM_ABI const key_container_type& keys() const noexcept { return __containers_.keys; }
+  _LIBCPP_HIDE_FROM_ABI const mapped_container_type& values() const noexcept { return __containers_.values; }
+
+  // map operations
+  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) {
+    return __find_impl(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const {
+    return __find_impl(*this, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const {
+    auto [__first, __last] = equal_range(__x);
+    return __last - __first;
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const {
+    auto [__first, __last] = equal_range(__x);
+    return __last - __first;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const {
+    return find(__x) != end();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { return __lower_bound<iterator>(*this, __x); }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const {
+    return __lower_bound<const_iterator>(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) {
+    return __lower_bound<iterator>(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const {
+    return __lower_bound<const_iterator>(*this, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { return __upper_bound<iterator>(*this, __x); }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const {
+    return __upper_bound<const_iterator>(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) {
+    return __upper_bound<iterator>(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const {
+    return __upper_bound<const_iterator>(*this, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __x) {
+    return __equal_range_impl(*this, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __x) const {
+    return __equal_range_impl(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _Kp& __x) {
+    return __equal_range_impl(*this, __x);
+  }
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _Kp& __x) const {
+    return __equal_range_impl(*this, __x);
+  }
+
+  friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_multimap& __x, const flat_multimap& __y) {
+    return ranges::equal(__x, __y);
+  }
+
+  friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_multimap& __x, const flat_multimap& __y) {
+    return std::lexicographical_compare_three_way(
+        __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way);
+  }
+
+  friend _LIBCPP_HIDE_FROM_ABI void swap(flat_multimap& __x, flat_multimap& __y) noexcept { __x.swap(__y); }
+
+private:
+  struct __ctor_uses_allocator_tag {
+    explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_tag() = default;
+  };
+  struct __ctor_uses_allocator_empty_tag {
+    explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_empty_tag() = default;
+  };
+
+  template <class _Allocator, class _KeyCont, class _MappedCont, class... _CompArg>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_multimap(__ctor_uses_allocator_tag,
+                const _Allocator& __alloc,
+                _KeyCont&& __key_cont,
+                _MappedCont&& __mapped_cont,
+                _CompArg&&... __comp)
+      : __containers_{.keys = std::make_obj_using_allocator<key_container_type>(
+                          __alloc, std::forward<_KeyCont>(__key_cont)),
+                      .values = std::make_obj_using_allocator<mapped_container_type>(
+                          __alloc, std::forward<_MappedCont>(__mapped_cont))},
+        __compare_(std::forward<_CompArg>(__comp)...) {}
+
+  template <class _Allocator, class... _CompArg>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_multimap(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp)
+      : __containers_{.keys   = std::make_obj_using_allocator<key_container_type>(__alloc),
+                      .values = std::make_obj_using_allocator<mapped_container_type>(__alloc)},
+        __compare_(std::forward<_CompArg>(__comp)...) {}
+
+  _LIBCPP_HIDE_FROM_ABI bool __is_sorted(auto&& __key_container) const {
+    return ranges::is_sorted(__key_container, __compare_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void __sort() {
+    auto __zv = ranges::views::zip(__containers_.keys, __containers_.values);
+    ranges::stable_sort(__zv, __compare_, [](const auto& __p) -> decltype(auto) { return std::get<0>(__p); });
+  }
+
+  template <class _InputIterator, class _Sentinel>
+  _LIBCPP_HIDE_FROM_ABI size_type __append(_InputIterator __first, _Sentinel __last) {
+    size_type __num_of_appended = 0;
+    for (; __first != __last; ++__first) {
+      value_type __kv = *__first;
+      __containers_.keys.insert(__containers_.keys.end(), std::move(__kv.first));
+      __containers_.values.insert(__containers_.values.end(), std::move(__kv.second));
+      ++__num_of_appended;
+    }
+    return __num_of_appended;
+  }
+
+  template <bool _WasSorted, class _InputIterator, class _Sentinel>
+  _LIBCPP_HIDE_FROM_ABI void __append_sort_merge(_InputIterator __first, _Sentinel __last) {
+    auto __on_failure        = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    size_t __num_of_appended = __append(std::move(__first), std::move(__last));
+    if (__num_of_appended != 0) {
+      auto __zv                  = ranges::views::zip(__containers_.keys, __containers_.values);
+      auto __append_start_offset = __containers_.keys.size() - __num_of_appended;
+      auto __end                 = __zv.end();
+      auto __compare_key         = [this](const auto& __p1, const auto& __p2) {
+        return __compare_(std::get<0>(__p1), std::get<0>(__p2));
+      };
+      if constexpr (!_WasSorted) {
+        ranges::stable_sort(__zv.begin() + __append_start_offset, __end, __compare_key);
+      } else {
+        _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(
+            __is_sorted(__containers_.keys | ranges::views::drop(__append_start_offset)),
+            "Key container is not sorted");
+      }
+      ranges::inplace_merge(__zv.begin(), __zv.begin() + __append_start_offset, __end, __compare_key);
+    }
+    __on_failure.__complete();
+  }
+
+  template <class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) {
+    auto __it   = __self.lower_bound(__key);
+    auto __last = __self.end();
+    if (__it == __last || __self.__compare_(__key, __it->first)) {
+      return __last;
+    }
+    return __it;
+  }
+
+  template <class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) {
+    auto [__key_first, __key_last] = ranges::equal_range(__self.__containers_.keys, __key, __self.__compare_);
+
+    const auto __make_mapped_iter = [&](const auto& __key_iter) {
+      return __self.__containers_.values.begin() +
+             static_cast<ranges::range_difference_t<mapped_container_type>>(
+                 ranges::distance(__self.__containers_.keys.begin(), __key_iter));
+    };
+
+    using __iterator_type = ranges::iterator_t<decltype(__self)>;
+    return std::make_pair(__iterator_type(__key_first, __make_mapped_iter(__key_first)),
+                          __iterator_type(__key_last, __make_mapped_iter(__key_last)));
+  }
+
+  template <class _Res, class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) {
+    return __binary_search<_Res>(__self, ranges::lower_bound, __x);
+  }
+
+  template <class _Res, class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) {
+    return __binary_search<_Res>(__self, ranges::upper_bound, __x);
+  }
+
+  template <class _Res, class _Self, class _Fn, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static _Res __binary_search(_Self&& __self, _Fn __search_fn, _Kp& __x) {
+    auto __key_iter = __search_fn(__self.__containers_.keys, __x, __self.__compare_);
+    auto __mapped_iter =
+        __self.__containers_.values.begin() +
+        static_cast<ranges::range_difference_t<mapped_container_type>>(
+            ranges::distance(__self.__containers_.keys.begin(), __key_iter));
+
+    return _Res(std::move(__key_iter), std::move(__mapped_iter));
+  }
+
+  template <class _Fn, class _KeyArg, class... _MArgs>
+  _LIBCPP_HIDE_FROM_ABI iterator
+  __search_and_emplace_with(_Fn&& __search_fn, _KeyArg&& __key, _MArgs&&... __mapped_args) {
+    auto __key_it    = __search_fn(__containers_.keys, __key, __compare_);
+    auto __mapped_it = __containers_.values.begin() + ranges::distance(__containers_.keys.begin(), __key_it);
+
+    return __emplace_exact_pos(
+        std::move(__key_it),
+        std::move(__mapped_it),
+        std::forward<_KeyArg>(__key),
+        std::forward<_MArgs>(__mapped_args)...);
+  }
+
+  template <class _IterK, class _IterM, class _KeyArg, class... _MArgs>
+  _LIBCPP_HIDE_FROM_ABI iterator
+  __emplace_exact_pos(_IterK&& __it_key, _IterM&& __it_mapped, _KeyArg&& __key, _MArgs&&... __mapped_args) {
+    auto __on_key_failed = std::__make_exception_guard([&]() noexcept {
+      if constexpr (__container_traits<_KeyContainer>::__emplacement_has_strong_exception_safety_guarantee) {
+        // Nothing to roll back!
+      } else {
+        // we need to clear both because we don't know the state of our keys anymore
+        clear() /* noexcept */;
+      }
+    });
+    auto __key_it        = __containers_.keys.emplace(__it_key, std::forward<_KeyArg>(__key));
+    __on_key_failed.__complete();
+
+    auto __on_value_failed = std::__make_exception_guard([&]() noexcept {
+      if constexpr (!__container_traits<_MappedContainer>::__emplacement_has_strong_exception_safety_guarantee) {
+        // we need to clear both because we don't know the state of our values anymore
+        clear() /* noexcept */;
+      } else {
+        // In this case, we know the values are just like before we attempted emplacement,
+        // and we also know that the keys have been emplaced successfully. Just roll back the keys.
+#  if _LIBCPP_HAS_EXCEPTIONS
+        try {
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+          __containers_.keys.erase(__key_it);
+#  if _LIBCPP_HAS_EXCEPTIONS
+        } catch (...) {
+          // Now things are funky for real. We're failing to rollback the keys.
+          // Just give up and clear the whole thing.
+          //
+          // Also, swallow the exception that happened during the rollback and let the
+          // original value-emplacement exception propagate normally.
+          clear() /* noexcept */;
+        }
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+      }
+    });
+    auto __mapped_it = __containers_.values.emplace(__it_mapped, std::forward<_MArgs>(__mapped_args)...);
+    __on_value_failed.__complete();
+
+    return iterator(std::move(__key_it), std::move(__mapped_it));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) {
+    if constexpr (requires { __containers_.keys.reserve(__size); }) {
+      __containers_.keys.reserve(__size);
+    }
+
+    if constexpr (requires { __containers_.values.reserve(__size); }) {
+      __containers_.values.reserve(__size);
+    }
+  }
+
+  template <class _KIter, class _MIter>
+  _LIBCPP_HIDE_FROM_ABI iterator __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) {
+    auto __on_failure  = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    auto __key_iter    = __containers_.keys.erase(__key_iter_to_remove);
+    auto __mapped_iter = __containers_.values.erase(__mapped_iter_to_remove);
+    __on_failure.__complete();
+    return iterator(std::move(__key_iter), std::move(__mapped_iter));
+  }
+
+  template <class _Key2, class _Tp2, class _Compare2, class _KeyContainer2, class _MappedContainer2, class _Predicate>
+  friend typename flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>::size_type
+  erase_if(flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate);
+
+  containers __containers_;
+  [[no_unique_address]] key_compare __compare_;
+
+  struct __key_equiv {
+    _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {}
+    _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const {
+      return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x));
+    }
+    key_compare __comp_;
+  };
+};
+
+template <class _KeyContainer, class _MappedContainer, class _Compare = less<typename _KeyContainer::value_type>>
+  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+           !__is_allocator<_MappedContainer>::value &&
+           is_invocable_v<const _Compare&,
+                          const typename _KeyContainer::value_type&,
+                          const typename _KeyContainer::value_type&>)
+flat_multimap(_KeyContainer, _MappedContainer, _Compare = _Compare())
+    -> flat_multimap<typename _KeyContainer::value_type,
+                     typename _MappedContainer::value_type,
+                     _Compare,
+                     _KeyContainer,
+                     _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Allocator>
+  requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
+           !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value)
+flat_multimap(_KeyContainer, _MappedContainer, _Allocator)
+    -> flat_multimap<typename _KeyContainer::value_type,
+                     typename _MappedContainer::value_type,
+                     less<typename _KeyContainer::value_type>,
+                     _KeyContainer,
+                     _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Compare, class _Allocator>
+  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+           !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> &&
+           uses_allocator_v<_MappedContainer, _Allocator> &&
+           is_invocable_v<const _Compare&,
+                          const typename _KeyContainer::value_type&,
+                          const typename _KeyContainer::value_type&>)
+flat_multimap(_KeyContainer, _MappedContainer, _Compare, _Allocator)
+    -> flat_multimap<typename _KeyContainer::value_type,
+                     typename _MappedContainer::value_type,
+                     _Compare,
+                     _KeyContainer,
+                     _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Compare = less<typename _KeyContainer::value_type>>
+  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+           !__is_allocator<_MappedContainer>::value &&
+           is_invocable_v<const _Compare&,
+                          const typename _KeyContainer::value_type&,
+                          const typename _KeyContainer::value_type&>)
+flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Compare = _Compare())
+    -> flat_multimap<typename _KeyContainer::value_type,
+                     typename _MappedContainer::value_type,
+                     _Compare,
+                     _KeyContainer,
+                     _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Allocator>
+  requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
+           !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value)
+flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Allocator)
+    -> flat_multimap<typename _KeyContainer::value_type,
+                     typename _MappedContainer::value_type,
+                     less<typename _KeyContainer::value_type>,
+                     _KeyContainer,
+                     _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Compare, class _Allocator>
+  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+           !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> &&
+           uses_allocator_v<_MappedContainer, _Allocator> &&
+           is_invocable_v<const _Compare&,
+                          const typename _KeyContainer::value_type&,
+                          const typename _KeyContainer::value_type&>)
+flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Compare, _Allocator)
+    -> flat_multimap<typename _KeyContainer::value_type,
+                     typename _MappedContainer::value_type,
+                     _Compare,
+                     _KeyContainer,
+                     _MappedContainer>;
+
+template <class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>>
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+flat_multimap(_InputIterator, _InputIterator, _Compare = _Compare())
+    -> flat_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>;
+
+template <class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>>
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+flat_multimap(sorted_equivalent_t, _InputIterator, _InputIterator, _Compare = _Compare())
+    -> flat_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>;
+
+template <ranges::input_range _Range,
+          class _Compare   = less<__range_key_type<_Range>>,
+          class _Allocator = allocator<byte>,
+          class            = __enable_if_t<!__is_allocator<_Compare>::value && __is_allocator<_Allocator>::value>>
+flat_multimap(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_multimap<
+    __range_key_type<_Range>,
+    __range_mapped_type<_Range>,
+    _Compare,
+    vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
+    vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
+
+template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator<_Allocator>::value>>
+flat_multimap(from_range_t, _Range&&, _Allocator) -> flat_multimap<
+    __range_key_type<_Range>,
+    __range_mapped_type<_Range>,
+    less<__range_key_type<_Range>>,
+    vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
+    vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
+
+template <class _Key, class _Tp, class _Compare = less<_Key>>
+  requires(!__is_allocator<_Compare>::value)
+flat_multimap(initializer_list<pair<_Key, _Tp>>, _Compare = _Compare()) -> flat_multimap<_Key, _Tp, _Compare>;
+
+template <class _Key, class _Tp, class _Compare = less<_Key>>
+  requires(!__is_allocator<_Compare>::value)
+flat_multimap(sorted_equivalent_t, initializer_list<pair<_Key, _Tp>>, _Compare = _Compare())
+    -> flat_multimap<_Key, _Tp, _Compare>;
+
+template <class _Key, class _Tp, class _Compare, class _KeyContainer, class _MappedContainer, class _Allocator>
+struct uses_allocator<flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>, _Allocator>
+    : bool_constant<uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator>> {};
+
+template <class _Key, class _Tp, class _Compare, class _KeyContainer, class _MappedContainer, class _Predicate>
+_LIBCPP_HIDE_FROM_ABI typename flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type
+erase_if(flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_multimap, _Predicate __pred) {
+  // todo
+  auto __zv     = ranges::views::zip(__flat_multimap.__containers_.keys, __flat_multimap.__containers_.values);
+  auto __first  = __zv.begin();
+  auto __last   = __zv.end();
+  auto __guard  = std::__make_exception_guard([&] { __flat_multimap.clear(); });
+  auto __it     = std::remove_if(__first, __last, [&](auto&& __zipped) -> bool {
+    using _Ref = typename flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::const_reference;
+    return __pred(_Ref(std::get<0>(__zipped), std::get<1>(__zipped)));
+  });
+  auto __res    = __last - __it;
+  auto __offset = __it - __first;
+
+  const auto __erase_container = [&](auto& __cont) { __cont.erase(__cont.begin() + __offset, __cont.end()); };
+
+  __erase_container(__flat_multimap.__containers_.keys);
+  __erase_container(__flat_multimap.__containers_.values);
+
+  __guard.__complete();
+  return __res;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FLAT_MAP_FLAT_MULTIMAP_H
diff --git a/libcxx/include/__flat_map/sorted_equivalent.h b/libcxx/include/__flat_map/sorted_equivalent.h
new file mode 100644
index 00000000000000..1db935cc6ee75e
--- /dev/null
+++ b/libcxx/include/__flat_map/sorted_equivalent.h
@@ -0,0 +1,31 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___FLAT_MAP_SORTED_EQUIVALENT_H
+#define _LIBCPP___FLAT_MAP_SORTED_EQUIVALENT_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 23
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+struct sorted_equivalent_t {
+  explicit sorted_equivalent_t() = default;
+};
+inline constexpr sorted_equivalent_t sorted_equivalent{};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 23
+
+#endif // _LIBCPP___FLAT_MAP_SORTED_EQUIVALENT_H
diff --git a/libcxx/include/flat_map b/libcxx/include/flat_map
index e96af677a7eed9..84345719b8c43c 100644
--- a/libcxx/include/flat_map
+++ b/libcxx/include/flat_map
@@ -35,13 +35,34 @@ namespace std {
            class Predicate>
     typename flat_map<Key, T, Compare, KeyContainer, MappedContainer>::size_type
       erase_if(flat_map<Key, T, Compare, KeyContainer, MappedContainer>& c, Predicate pred);
+
+  // [flat.multimap], class template flat_multimap
+  template<class Key, class T, class Compare = less<Key>,
+           class KeyContainer = vector<Key>, class MappedContainer = vector<T>>
+    class flat_multimap;
+
+  struct sorted_equivalent_t { explicit sorted_equivalent_t() = default; };
+  inline constexpr sorted_equivalent_t sorted_equivalent{};
+
+  template<class Key, class T, class Compare, class KeyContainer, class MappedContainer,
+           class Allocator>
+    struct uses_allocator<flat_multimap<Key, T, Compare, KeyContainer, MappedContainer>,
+                          Allocator>;
+
+  // [flat.multimap.erasure], erasure for flat_multimap
+  template<class Key, class T, class Compare, class KeyContainer, class MappedContainer,
+           class Predicate>
+    typename flat_multimap<Key, T, Compare, KeyContainer, MappedContainer>::size_type
+      erase_if(flat_multimap<Key, T, Compare, KeyContainer, MappedContainer>& c, Predicate pred);
 */
 
 #include <__assert> // all public C++ headers provide the assertion handler
 #include <__config>
 #include <__flat_map/flat_map.h>
+#include <__flat_map/flat_multimap.h>
 #include <__flat_map/key_value_iterator.h>
 #include <__flat_map/sorted_unique.h>
+#include <__flat_map/sorted_equivalent.h>
 #include <version>
 
 // standard required includes
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index 52e13aebc2187c..5bdf09e7177bd6 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -1229,8 +1229,10 @@ module std [system] {
 
   module flat_map {
     module flat_map                       { header "__flat_map/flat_map.h" }
+    module flat_multimap                  { header "__flat_map/flat_multimap.h" }
     module key_value_iterator             { header "__flat_map/key_value_iterator.h" }
     module sorted_unique                  { header "__flat_map/sorted_unique.h" }
+    module sorted_equivalent              { header "__flat_map/sorted_equivalent.h" }
 
     header "flat_map"
     export *
diff --git a/libcxx/include/version b/libcxx/include/version
index fc57aeade9daf2..3c8e640934fced 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -479,6 +479,7 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_format_ranges                        202207L
 // # define __cpp_lib_formatters                           202302L
 # define __cpp_lib_forward_like                         202207L
+# define __cpp_lib_flat_map                             202207L
 # define __cpp_lib_invoke_r                             202106L
 # define __cpp_lib_ios_noreplace                        202207L
 # if __has_builtin(__builtin_is_implicit_lifetime)
diff --git a/libcxx/modules/std/flat_map.inc b/libcxx/modules/std/flat_map.inc
index 6a86229bceaba9..e9521749dc4a86 100644
--- a/libcxx/modules/std/flat_map.inc
+++ b/libcxx/modules/std/flat_map.inc
@@ -20,8 +20,6 @@ export namespace std {
   // [flat.map.erasure], erasure for flat_map
   using std::erase_if;
 
-#endif // _LIBCPP_STD_VER >= 23
-#if 0
   // [flat.multimap], class template flat_multimap
   using std::flat_multimap;
 
@@ -29,5 +27,5 @@ export namespace std {
   using std::sorted_equivalent_t;
 
   // [flat.multimap.erasure], erasure for flat_multimap
-#endif
+#endif // _LIBCPP_STD_VER >= 23
 } // namespace std
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_equivalent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_equivalent.pass.cpp
new file mode 100644
index 00000000000000..d9ee3fbd287b5c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_equivalent.pass.cpp
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// struct sorted_equivalent_t { explicit sorted_equivalent_t() = default; };
+// inline constexpr sorted_equivalent_t sorted_equivalent{};
+
+#include <cassert>
+#include <concepts>
+#include <flat_map>
+#include <type_traits>
+
+template <class T>
+void implicit_test(T) {}
+
+template <class T>
+concept HasImplicitDefaultCtor = requires { implicit_test<T>({}); };
+
+static_assert(std::is_default_constructible_v<std::sorted_equivalent_t>);
+static_assert(std::is_trivially_default_constructible_v<std::sorted_equivalent_t>);
+static_assert(!HasImplicitDefaultCtor<std::sorted_equivalent_t>);
+
+constexpr bool test() {
+  {
+    [[maybe_unused]] std::sorted_equivalent_t s;
+  }
+  {
+    [[maybe_unused]] std::same_as<const std::sorted_equivalent_t&> decltype(auto) s = (std::sorted_equivalent);
+  }
+  {
+    [[maybe_unused]] std::same_as<const std::sorted_equivalent_t> decltype(auto) copy = std::sorted_equivalent;
+  }
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/empty.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/empty.pass.cpp
new file mode 100644
index 00000000000000..b65b12b7bfd81b
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/empty.pass.cpp
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// [[nodiscard]] bool empty() const noexcept;
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using M = std::flat_multimap<int, double, std::less<int>, KeyContainer, ValueContainer>;
+  M m;
+  ASSERT_SAME_TYPE(decltype(m.empty()), bool);
+  ASSERT_NOEXCEPT(m.empty());
+  assert(m.empty());
+  assert(std::as_const(m).empty());
+  m = {{1, 1.0}, {1, 2.0}};
+  assert(!m.empty());
+  m.clear();
+  assert(m.empty());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/empty.verify.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/empty.verify.cpp
new file mode 100644
index 00000000000000..b3cfce97b357da
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/empty.verify.cpp
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// [[nodiscard]] bool empty() const noexcept;
+
+#include <flat_map>
+
+#include "test_macros.h"
+
+int main(int, char**) {
+  std::flat_multimap<int, int> c;
+  c.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/max_size.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/max_size.pass.cpp
new file mode 100644
index 00000000000000..0960c43c5a90a0
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/max_size.pass.cpp
@@ -0,0 +1,78 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// size_type max_size() const noexcept;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_allocator.h"
+#include "test_macros.h"
+
+int main(int, char**) {
+  {
+    using A1 = limited_allocator<int, 10>;
+    using A2 = limited_allocator<int, 20>;
+    using C  = std::flat_multimap<int, int, std::less<int>, std::vector<int, A1>, std::vector<int, A2>>;
+    ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t);
+    ASSERT_SAME_TYPE(C::size_type, std::size_t);
+    const C c;
+    ASSERT_NOEXCEPT(c.max_size());
+    ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type);
+    assert(c.max_size() <= 10);
+    LIBCPP_ASSERT(c.max_size() == 10);
+  }
+  {
+    using A1 = limited_allocator<int, 10>;
+    using A2 = limited_allocator<int, 20>;
+    using C  = std::flat_multimap<int, int, std::less<int>, std::vector<int, A2>, std::vector<int, A1>>;
+    ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t);
+    ASSERT_SAME_TYPE(C::size_type, std::size_t);
+    const C c;
+    ASSERT_NOEXCEPT(c.max_size());
+    ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type);
+    assert(c.max_size() <= 10);
+    LIBCPP_ASSERT(c.max_size() == 10);
+  }
+  {
+    using A = limited_allocator<int, (size_t)-1>;
+    using C = std::flat_multimap<int, int, std::less<int>, std::vector<int, A>, std::vector<int, A>>;
+    ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t);
+    ASSERT_SAME_TYPE(C::size_type, std::size_t);
+    const C::size_type max_dist = static_cast<C::size_type>(std::numeric_limits<C::difference_type>::max());
+    const C c;
+    ASSERT_NOEXCEPT(c.max_size());
+    ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type);
+    assert(c.max_size() <= max_dist);
+    LIBCPP_ASSERT(c.max_size() == max_dist);
+  }
+  {
+    typedef std::flat_multimap<char, char> C;
+    ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t);
+    ASSERT_SAME_TYPE(C::size_type, std::size_t);
+    const C::size_type max_dist = static_cast<C::size_type>(std::numeric_limits<C::difference_type>::max());
+    const C c;
+    ASSERT_NOEXCEPT(c.max_size());
+    ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type);
+    assert(c.max_size() <= max_dist);
+    assert(c.max_size() <= alloc_max_size(std::allocator<char>()));
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/size.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/size.pass.cpp
new file mode 100644
index 00000000000000..533f8da631fc80
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.capacity/size.pass.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// size_type size() const noexcept;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using M = std::flat_multimap<int, char, std::less<int>, KeyContainer, ValueContainer>;
+  {
+    const M m = {{1, 'a'}, {1, 'b'}, {4, 'd'}, {5, 'e'}, {5, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.size()), std::size_t);
+    ASSERT_NOEXCEPT(m.size());
+    assert(m.size() == 5);
+  }
+  {
+    const M m = {{1, 'a'}};
+    ASSERT_SAME_TYPE(decltype(m.size()), std::size_t);
+    ASSERT_NOEXCEPT(m.size());
+    assert(m.size() == 1);
+  }
+  {
+    const M m;
+    ASSERT_SAME_TYPE(decltype(m.size()), std::size_t);
+    ASSERT_NOEXCEPT(m.size());
+    assert(m.size() == 0);
+  }
+  {
+    M m;
+    std::size_t s = 1000;
+    for (auto i = 0u; i < s; ++i) {
+      m.emplace(i, 'a');
+    }
+    for (auto i = 0u; i < s; ++i) {
+      m.emplace(i, 'b');
+    }
+    ASSERT_SAME_TYPE(decltype(m.size()), std::size_t);
+    ASSERT_NOEXCEPT(m.size());
+    assert(m.size() == 2 * s);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/alloc.pass.cpp
new file mode 100644
index 00000000000000..3e155eb2a1075b
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/alloc.pass.cpp
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class Allocator>
+//   explicit flat_multimap(const Allocator& a);
+
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "test_allocator.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_multimap<int, int, C, V1, V1>;
+    using M2 = std::flat_multimap<int, int, C, V1, V2>;
+    using M3 = std::flat_multimap<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const A2&>);
+  }
+  {
+    // explicit
+    using M =
+        std::flat_multimap<int,
+                           long,
+                           std::less<int>,
+                           std::vector<int, test_allocator<int>>,
+                           std::vector<long, test_allocator<long>>>;
+
+    static_assert(std::is_constructible_v<M, test_allocator<int>>);
+    static_assert(!std::is_convertible_v<test_allocator<int>, M>);
+  }
+  {
+    using A = test_allocator<short>;
+    using M =
+        std::flat_multimap<int,
+                           long,
+                           std::less<int>,
+                           std::vector<int, test_allocator<int>>,
+                           std::vector<long, test_allocator<long>>>;
+    M m(A(0, 5));
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.keys().get_allocator().get_id() == 5);
+    assert(m.values().get_allocator().get_id() == 5);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/assign_initializer_list.pass.cpp
new file mode 100644
index 00000000000000..32f75daae7e383
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/assign_initializer_list.pass.cpp
@@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap& operator=(initializer_list<value_type> il);
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+#include "test_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  {
+    M m = {{8, 8}, {10, 10}};
+    assert(m.size() == 2);
+    m                              = {{3, 0}, {1, 0}, {2, 0}, {2, 1}, {3, 1}, {4, 0}, {3, 2}, {5, 0}, {6, 0}, {5, 1}};
+    std::pair<int, int> expected[] = {{1, 0}, {2, 0}, {2, 1}, {3, 0}, {3, 1}, {3, 2}, {4, 0}, {5, 0}, {5, 1}, {6, 0}};
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    M m = {{10, 1}, {8, 1}};
+    assert(m.size() == 2);
+    m                                    = {{3, 2}};
+    std::pair<double, double> expected[] = {{3, 2}};
+    assert(std::ranges::equal(m, expected));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/compare.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/compare.pass.cpp
new file mode 100644
index 00000000000000..1989b8a4ff68ad
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/compare.pass.cpp
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// explicit flat_multimap(const key_compare& comp);
+// template <class Alloc>
+//   flat_multimap(const key_compare& comp, const Alloc& a);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <type_traits>
+#include <vector>
+
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using M1 = std::flat_multimap<int, int, C, std::vector<int, A1>, std::vector<int, A1>>;
+    using M2 = std::flat_multimap<int, int, C, std::vector<int, A1>, std::vector<int, A2>>;
+    using M3 = std::flat_multimap<int, int, C, std::vector<int, A2>, std::vector<int, A1>>;
+    static_assert(std::is_constructible_v<M1, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const C&, const A2&>);
+  }
+  {
+    using C = test_less<int>;
+    auto m  = std::flat_multimap<int, char*, C>(C(3));
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.key_comp() == C(3));
+  }
+  {
+    // The one-argument ctor is explicit.
+    using C = test_less<int>;
+    static_assert(std::is_constructible_v<std::flat_multimap<int, char*, C>, C>);
+    static_assert(!std::is_convertible_v<C, std::flat_multimap<int, char*, C>>);
+
+    static_assert(std::is_constructible_v<std::flat_multimap<int, char*>, std::less<int>>);
+    static_assert(!std::is_convertible_v<std::less<int>, std::flat_multimap<int, char*>>);
+  }
+  {
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    auto m   = std::flat_multimap<int, short, C, std::vector<int, A1>, std::vector<short, A2>>(C(4), A1(5));
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.key_comp() == C(4));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // explicit(false)
+    using C                                                                         = test_less<int>;
+    using A1                                                                        = test_allocator<int>;
+    using A2                                                                        = test_allocator<short>;
+    std::flat_multimap<int, short, C, std::deque<int, A1>, std::deque<short, A2>> m = {C(4), A1(5)};
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.key_comp() == C(4));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // If an allocator is given, it must be usable by both containers.
+    using A = test_allocator<int>;
+    using M = std::flat_multimap<int, int, std::less<>, std::vector<int>, std::vector<int, A>>;
+    static_assert(std::is_constructible_v<M, std::less<>>);
+    static_assert(!std::is_constructible_v<M, std::less<>, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, std::less<>, A>);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/containers.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/containers.pass.cpp
new file mode 100644
index 00000000000000..17ee3c3864b1bb
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/containers.pass.cpp
@@ -0,0 +1,187 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap(key_container_type key_cont, mapped_container_type mapped_cont,
+//           const key_compare& comp = key_compare());
+// template<class Allocator>
+//   flat_multimap(const key_container_type& key_cont, const mapped_container_type& mapped_cont,
+//            const Allocator& a);
+// template<class Alloc>
+//   flat_multimap(const key_container_type& key_cont, const mapped_container_type& mapped_cont,
+//            const key_compare& comp, const Alloc& a);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+struct P {
+  int first;
+  int second;
+  template <class T, class U>
+  bool operator==(const std::pair<T, U>& rhs) const {
+    return MoveOnly(first) == rhs.first && MoveOnly(second) == rhs.second;
+  }
+};
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_multimap<int, int, C, V1, V1>;
+    using M2 = std::flat_multimap<int, int, C, V1, V2>;
+    using M3 = std::flat_multimap<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, const V1&, const V1&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const V1&, const V1&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const V1&, const V2&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const V2&, const V1&, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, const V1&, const V1&, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const V1&, const V1&, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const V1&, const V2&, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const V2&, const V1&, const C&, const A2&>);
+  }
+  {
+    // flat_multimap(key_container_type , mapped_container_type)
+    using M                         = std::flat_multimap<int, char>;
+    std::vector<int> ks             = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    std::vector<char> vs            = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto m                          = M(ks, vs);
+    std::pair<int, char> expected[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {2, 7}, {3, 6}, {3, 8}, {3, 9}};
+    assert(std::ranges::equal(m, expected));
+
+    // explicit(false)
+    M m2 = {ks, vs};
+    assert(m2 == m);
+
+    m = M(std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // flat_multimap(key_container_type , mapped_container_type)
+    // move-only
+    P expected[] = {{3, 3}, {3, 2}, {2, 1}, {1, 4}};
+    using Ks     = std::deque<int, min_allocator<int>>;
+    using Vs     = std::vector<MoveOnly, min_allocator<MoveOnly>>;
+    using M      = std::flat_multimap<int, MoveOnly, std::greater<int>, Ks, Vs>;
+    Ks ks        = {1, 3, 3, 2};
+    Vs vs;
+    vs.push_back(4);
+    vs.push_back(3);
+    vs.push_back(2);
+    vs.push_back(1);
+    auto m = M(std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert(std::ranges::equal(m, expected, std::equal_to<>()));
+  }
+  {
+    // flat_multimap(key_container_type , mapped_container_type)
+    // container's allocators are used
+    using A = test_allocator<int>;
+    using M = std::flat_multimap<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5));
+    auto vs = std::deque<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6));
+    auto m  = M(std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    std::pair<int, int> expected[] = {{1, 1}, {1, 1}, {1, 1}, {2, 2}, {2, 2}, {2, 2}, {3, 3}, {3, 3}, {3, 3}};
+    assert(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A(5));
+    assert(m.values().get_allocator() == A(6));
+  }
+  {
+    // flat_multimap(key_container_type , mapped_container_type, key_compare)
+    using C                         = test_less<int>;
+    using M                         = std::flat_multimap<int, char, C>;
+    std::vector<int> ks             = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    std::vector<char> vs            = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto m                          = M(ks, vs, C(4));
+    std::pair<int, char> expected[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {2, 7}, {3, 6}, {3, 8}, {3, 9}};
+    assert(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(4));
+
+    // explicit(false)
+    M m2 = {ks, vs, C(4)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(4));
+  }
+  {
+    // flat_multimap(key_container_type , mapped_container_type, const Allocator&)
+    using A = test_allocator<int>;
+    using M = std::flat_multimap<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5));
+    auto vs = std::deque<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6));
+    auto m  = M(ks, vs, A(4)); // replaces the allocators
+    assert(!ks.empty());       // it was an lvalue above
+    assert(!vs.empty());       // it was an lvalue above
+    std::pair<int, int> expected[] = {{1, 1}, {1, 1}, {1, 1}, {2, 2}, {2, 2}, {2, 2}, {3, 3}, {3, 3}, {3, 3}};
+    assert(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A(4));
+    assert(m.values().get_allocator() == A(4));
+  }
+  {
+    // flat_multimap(key_container_type , mapped_container_type, const Allocator&)
+    // explicit(false)
+    using A = test_allocator<int>;
+    using M = std::flat_multimap<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5));
+    auto vs = std::deque<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6));
+    M m     = {ks, vs, A(4)}; // implicit ctor
+    assert(!ks.empty());      // it was an lvalue above
+    assert(!vs.empty());      // it was an lvalue above
+    std::pair<int, int> expected[] = {{1, 1}, {1, 1}, {1, 1}, {2, 2}, {2, 2}, {2, 2}, {3, 3}, {3, 3}, {3, 3}};
+    assert(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A(4));
+    assert(m.values().get_allocator() == A(4));
+  }
+  {
+    // flat_multimap(key_container_type , mapped_container_type, key_compare, const Allocator&)
+    using C                         = test_less<int>;
+    using A                         = test_allocator<int>;
+    using M                         = std::flat_multimap<int, int, C, std::vector<int, A>, std::vector<int, A>>;
+    std::vector<int, A> ks          = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    std::vector<int, A> vs          = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto m                          = M(ks, vs, C(4), A(5));
+    std::pair<int, char> expected[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {2, 7}, {3, 6}, {3, 8}, {3, 9}};
+    assert(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(4));
+    assert(m.keys().get_allocator() == A(5));
+    assert(m.values().get_allocator() == A(5));
+
+    // explicit(false)
+    M m2 = {ks, vs, C(4), A(5)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(4));
+    assert(m2.keys().get_allocator() == A(5));
+    assert(m2.values().get_allocator() == A(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy.pass.cpp
new file mode 100644
index 00000000000000..0e6d12cd3c5699
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy.pass.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap(const flat_multimap& m);
+
+#include <cassert>
+#include <flat_map>
+#include <vector>
+
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    using C = test_less<int>;
+    std::vector<int, test_allocator<int>> ks({1, 1, 3, 3, 5}, test_allocator<int>(6));
+    std::vector<char, test_allocator<char>> vs({2, 2, 1, 1, 1}, test_allocator<char>(7));
+    using M = std::flat_multimap<int, char, C, decltype(ks), decltype(vs)>;
+    auto mo = M(ks, vs, C(5));
+    auto m  = mo;
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == test_allocator<int>(6));
+    assert(m.values().get_allocator() == test_allocator<char>(7));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == test_allocator<int>(6));
+    assert(mo.values().get_allocator() == test_allocator<char>(7));
+  }
+  {
+    using C  = test_less<int>;
+    using Ks = std::vector<int, other_allocator<int>>;
+    using Vs = std::vector<char, other_allocator<char>>;
+    auto ks  = Ks({1, 3, 5, 5, 5, 5}, other_allocator<int>(6));
+    auto vs  = Vs({2, 2, 5, 5, 5, 1}, other_allocator<char>(7));
+    using M  = std::flat_multimap<int, char, C, Ks, Vs>;
+    auto mo  = M(Ks(ks, other_allocator<int>(6)), Vs(vs, other_allocator<int>(7)), C(5));
+    auto m   = mo;
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == other_allocator<int>(-2));
+    assert(m.values().get_allocator() == other_allocator<char>(-2));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == other_allocator<int>(6));
+    assert(mo.values().get_allocator() == other_allocator<char>(7));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_alloc.pass.cpp
new file mode 100644
index 00000000000000..3047c004d42e93
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_alloc.pass.cpp
@@ -0,0 +1,67 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap(const flat_multimap&, const allocator_type&);
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_multimap<int, int, C, V1, V1>;
+    using M2 = std::flat_multimap<int, int, C, V1, V2>;
+    using M3 = std::flat_multimap<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, const M1&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const M1&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const M2&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const M3&, const A2&>);
+  }
+  {
+    using C = test_less<int>;
+    std::vector<int, test_allocator<int>> ks({1, 3, 3, 5, 5}, test_allocator<int>(6));
+    std::vector<char, test_allocator<char>> vs({2, 2, 1, 1, 1}, test_allocator<char>(7));
+    using M = std::flat_multimap<int, char, C, decltype(ks), decltype(vs)>;
+    auto mo = M(ks, vs, C(5));
+    auto m  = M(mo, test_allocator<int>(3));
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == test_allocator<int>(3));
+    assert(m.values().get_allocator() == test_allocator<char>(3));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == test_allocator<int>(6));
+    assert(mo.values().get_allocator() == test_allocator<char>(7));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_assign.addressof.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_assign.addressof.compile.pass.cpp
new file mode 100644
index 00000000000000..233a9c68593181
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_assign.addressof.compile.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap& operator=(const flat_multimap& s);
+
+// Validate whether the container can be copy-assigned (move-assigned, swapped)
+// with an ADL-hijacking operator&
+
+#include <flat_map>
+#include <utility>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::flat_multimap<operator_hijacker, operator_hijacker> so;
+  std::flat_multimap<operator_hijacker, operator_hijacker> s;
+  s = so;
+  s = std::move(so);
+  swap(s, so);
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_assign.pass.cpp
new file mode 100644
index 00000000000000..3dd7ebdd388714
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/copy_assign.pass.cpp
@@ -0,0 +1,81 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap& operator=(const flat_multimap& m);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    // test_allocator is not propagated
+    using C = test_less<int>;
+    std::vector<int, test_allocator<int>> ks({1, 1, 3, 3, 5}, test_allocator<int>(6));
+    std::vector<char, test_allocator<char>> vs({1, 2, 3, 4, 5}, test_allocator<char>(7));
+    using M = std::flat_multimap<int, char, C, decltype(ks), decltype(vs)>;
+    auto mo = M(ks, vs, C(5));
+    auto m  = M({{3, 3}, {4, 4}, {5, 5}}, C(3), test_allocator<int>(2));
+    m       = mo;
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == test_allocator<int>(2));
+    assert(m.values().get_allocator() == test_allocator<char>(2));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == test_allocator<int>(6));
+    assert(mo.values().get_allocator() == test_allocator<char>(7));
+  }
+  {
+    // other_allocator is propagated
+    using C  = test_less<int>;
+    using Ks = std::vector<int, other_allocator<int>>;
+    using Vs = std::vector<char, other_allocator<char>>;
+    auto ks  = Ks({1, 1, 3, 3, 5}, other_allocator<int>(6));
+    auto vs  = Vs({2, 1, 3, 2, 1}, other_allocator<char>(7));
+    using M  = std::flat_multimap<int, char, C, Ks, Vs>;
+    auto mo  = M(Ks(ks, other_allocator<int>(6)), Vs(vs, other_allocator<int>(7)), C(5));
+    auto m   = M({{3, 3}, {4, 4}, {5, 5}}, C(3), other_allocator<int>(2));
+    m        = mo;
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == other_allocator<int>(6));
+    assert(m.values().get_allocator() == other_allocator<char>(7));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == other_allocator<int>(6));
+    assert(mo.values().get_allocator() == other_allocator<char>(7));
+  }
+  {
+    // self-assignment
+    using M = std::flat_multimap<int, int>;
+    M m     = {{1, 1}, {3, 4}};
+    m       = static_cast<const M&>(m);
+    assert((m == M{{1, 1}, {3, 4}}));
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct.pass.cpp
new file mode 100644
index 00000000000000..b10ddf03acde78
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct.pass.cpp
@@ -0,0 +1,344 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <deque>
+#include <initializer_list>
+#include <list>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "deduction_guides_sfinae_checks.h"
+#include "test_allocator.h"
+
+using P  = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+void test_copy() {
+  {
+    std::flat_multimap<long, short> source = {{1, 2}, {1, 3}};
+    std::flat_multimap s(source);
+    ASSERT_SAME_TYPE(decltype(s), decltype(source));
+    assert(s == source);
+  }
+  {
+    std::flat_multimap<long, short, std::greater<long>> source = {{1, 2}, {1, 3}};
+    std::flat_multimap s{source}; // braces instead of parens
+    ASSERT_SAME_TYPE(decltype(s), decltype(source));
+    assert(s == source);
+  }
+  {
+    std::flat_multimap<long, short, std::greater<long>> source = {{1, 2}, {1, 3}};
+    std::flat_multimap s(source, std::allocator<int>());
+    ASSERT_SAME_TYPE(decltype(s), decltype(source));
+    assert(s == source);
+  }
+}
+
+void test_containers() {
+  std::deque<int, test_allocator<int>> ks({1, 2, 1, 2, 2, INT_MAX, 3}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> vs({1, 2, 3, 4, 5, 3, 4}, test_allocator<int>(0, 43));
+  std::deque<int, test_allocator<int>> sorted_ks({1, 1, 2, 2, 2, 3, INT_MAX}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> sorted_vs({1, 3, 2, 4, 5, 4, 3}, test_allocator<int>(0, 43));
+  const std::pair<int, short> expected[] = {{1, 1}, {1, 3}, {2, 2}, {2, 4}, {2, 5}, {3, 4}, {INT_MAX, 3}};
+  {
+    std::flat_multimap s(ks, vs);
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::less<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 43);
+  }
+  {
+    std::flat_multimap s(std::sorted_equivalent, sorted_ks, sorted_vs);
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::less<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 43);
+  }
+  {
+    std::flat_multimap s(ks, vs, test_allocator<long>(0, 44));
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::less<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 44);
+    assert(s.values().get_allocator().get_id() == 44);
+  }
+  {
+    std::flat_multimap s(std::sorted_equivalent, sorted_ks, sorted_vs, test_allocator<long>(0, 44));
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::less<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 44);
+    assert(s.values().get_allocator().get_id() == 44);
+  }
+}
+
+void test_containers_compare() {
+  std::deque<int, test_allocator<int>> ks({1, 2, 1, 2, 2, INT_MAX, 3}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> vs({1, 2, 3, 4, 5, 3, 4}, test_allocator<int>(0, 43));
+  std::deque<int, test_allocator<int>> sorted_ks({INT_MAX, 3, 2, 2, 2, 1, 1}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> sorted_vs({3, 4, 2, 4, 5, 1, 3}, test_allocator<int>(0, 43));
+  const std::pair<int, short> expected[] = {{INT_MAX, 3}, {3, 4}, {2, 2}, {2, 4}, {2, 5}, {1, 1}, {1, 3}};
+  {
+    std::flat_multimap s(ks, vs, std::greater<int>());
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::greater<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 43);
+  }
+  {
+    std::flat_multimap s(std::sorted_equivalent, sorted_ks, sorted_vs, std::greater<int>());
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::greater<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 43);
+  }
+  {
+    std::flat_multimap s(ks, vs, std::greater<int>(), test_allocator<long>(0, 44));
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::greater<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 44);
+    assert(s.values().get_allocator().get_id() == 44);
+  }
+  {
+    std::flat_multimap s(
+        std::sorted_equivalent, sorted_ks, sorted_vs, std::greater<int>(), test_allocator<long>(0, 44));
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::greater<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 44);
+    assert(s.values().get_allocator().get_id() == 44);
+  }
+}
+
+void test_iter_iter() {
+  const P arr[]          = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+  const P sorted_arr[]   = {{1, 1L}, {1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}};
+  const PC arrc[]        = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+  const PC sorted_arrc[] = {{1, 1L}, {1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}};
+  {
+    std::flat_multimap m(std::begin(arr), std::end(arr));
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap m(std::begin(arrc), std::end(arrc));
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap m(std::sorted_equivalent, std::begin(sorted_arr), std::end(sorted_arr));
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap m(std::sorted_equivalent, std::begin(sorted_arrc), std::end(sorted_arrc));
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap<int, short> mo;
+    std::flat_multimap m(mo.begin(), mo.end());
+    ASSERT_SAME_TYPE(decltype(m), decltype(mo));
+  }
+  {
+    std::flat_multimap<int, short> mo;
+    std::flat_multimap m(mo.cbegin(), mo.cend());
+    ASSERT_SAME_TYPE(decltype(m), decltype(mo));
+  }
+}
+
+void test_iter_iter_compare() {
+  const P arr[]          = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+  const P sorted_arr[]   = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}, {1, 1L}};
+  const PC arrc[]        = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+  const PC sorted_arrc[] = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}, {1, 1L}};
+  using C                = std::greater<long long>;
+  {
+    std::flat_multimap m(std::begin(arr), std::end(arr), C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap m(std::begin(arrc), std::end(arrc), C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap m(std::sorted_equivalent, std::begin(sorted_arr), std::end(sorted_arr), C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap m(std::sorted_equivalent, std::begin(sorted_arrc), std::end(sorted_arrc), C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap<int, short> mo;
+    std::flat_multimap m(mo.begin(), mo.end(), C());
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, short, C>);
+  }
+  {
+    std::flat_multimap<int, short> mo;
+    std::flat_multimap m(mo.cbegin(), mo.cend(), C());
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, short, C>);
+  }
+}
+
+void test_initializer_list() {
+  const P sorted_arr[] = {{1, 1L}, {1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}};
+  {
+    std::flat_multimap m{std::pair{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap m(std::sorted_equivalent, {std::pair{1, 1L}, {1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}});
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+}
+
+void test_initializer_list_compare() {
+  const P sorted_arr[] = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}, {1, 1L}};
+  using C              = std::greater<long long>;
+  {
+    std::flat_multimap m({std::pair{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}}, C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_multimap m(std::sorted_equivalent, {std::pair{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}, {1, 1L}}, C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_multimap<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+}
+
+void test_from_range() {
+  std::list<std::pair<int, short>> r     = {{1, 1}, {2, 2}, {1, 1}, {INT_MAX, 4}, {3, 5}};
+  const std::pair<int, short> expected[] = {{1, 1}, {1, 1}, {2, 2}, {3, 5}, {INT_MAX, 4}};
+  {
+    std::flat_multimap s(std::from_range, r);
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::less<int>>);
+    assert(std::ranges::equal(s, expected));
+  }
+  {
+    std::flat_multimap s(std::from_range, r, test_allocator<long>(0, 42));
+    ASSERT_SAME_TYPE(
+        decltype(s),
+        std::flat_multimap<int,
+                           short,
+                           std::less<int>,
+                           std::vector<int, test_allocator<int>>,
+                           std::vector<short, test_allocator<short>>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 42);
+  }
+}
+
+void test_from_range_compare() {
+  std::list<std::pair<int, short>> r     = {{1, 1}, {2, 2}, {1, 1}, {INT_MAX, 4}, {3, 5}};
+  const std::pair<int, short> expected[] = {{INT_MAX, 4}, {3, 5}, {2, 2}, {1, 1}, {1, 1}};
+  {
+    std::flat_multimap s(std::from_range, r, std::greater<int>());
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, short, std::greater<int>>);
+    assert(std::ranges::equal(s, expected));
+  }
+  {
+    std::flat_multimap s(std::from_range, r, std::greater<int>(), test_allocator<long>(0, 42));
+    ASSERT_SAME_TYPE(
+        decltype(s),
+        std::flat_multimap<int,
+                           short,
+                           std::greater<int>,
+                           std::vector<int, test_allocator<int>>,
+                           std::vector<short, test_allocator<short>>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 42);
+  }
+}
+
+int main(int, char**) {
+  // Each test function also tests the sorted_equivalent-prefixed and allocator-suffixed overloads.
+  test_copy();
+  test_containers();
+  test_containers_compare();
+  test_iter_iter();
+  test_iter_iter_compare();
+  test_initializer_list();
+  test_initializer_list_compare();
+  test_from_range();
+  test_from_range_compare();
+
+  AssociativeContainerDeductionGuidesSfinaeAway<std::flat_multimap, std::flat_multimap<int, short>>();
+  {
+    std::flat_multimap s = {std::make_pair(1, 'a')}; // flat_multimap(initializer_list<pair<int, char>>)
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, char>);
+    assert(s.size() == 1);
+  }
+  {
+    using M = std::flat_multimap<int, short>;
+    M m;
+    std::flat_multimap s = {std::make_pair(m, m)}; // flat_multimap(initializer_list<pair<M, M>>)
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<M, M>);
+    assert(s.size() == 1);
+    assert(s.find(m)->second == m);
+  }
+
+  {
+    std::pair<int, int> source[3] = {{1, 1}, {1, 1}, {3, 3}};
+    std::flat_multimap s          = {source, source + 3}; // flat_multimap(InputIterator, InputIterator)
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, int>);
+    assert(s.size() == 3);
+  }
+  {
+    std::pair<int, int> source[3] = {{1, 1}, {1, 1}, {3, 3}};
+    std::flat_multimap s{source, source + 3}; // flat_multimap(InputIterator, InputIterator)
+    ASSERT_SAME_TYPE(decltype(s), std::flat_multimap<int, int>);
+    assert(s.size() == 3);
+  }
+  {
+    std::pair<int, int> source[3] = {{1, 1}, {1, 2}, {3, 3}};
+    std::flat_multimap s{
+        std::sorted_equivalent, source, source + 3}; // flat_multimap(sorted_equivalent_t, InputIterator, InputIterator)
+    static_assert(std::is_same_v<decltype(s), std::flat_multimap<int, int>>);
+    assert(s.size() == 3);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct.verify.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct.verify.cpp
new file mode 100644
index 00000000000000..abe5d4b7a8f72f
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct.verify.cpp
@@ -0,0 +1,97 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// Test CTAD on cases where deduction should fail.
+
+#include <flat_map>
+#include <functional>
+#include <memory>
+#include <utility>
+#include <vector>
+
+struct NotAnAllocator {
+  friend bool operator<(NotAnAllocator, NotAnAllocator) { return false; }
+};
+
+using P  = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+void test() {
+  {
+    // cannot deduce Key and T from just (KeyContainer), even if it's a container of pairs
+    std::vector<std::pair<int, int>> v;
+    std::flat_multimap s(v);
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce Key and T from just (KeyContainer, Allocator)
+    std::vector<int> v;
+    std::flat_multimap s(v, std::allocator<std::pair<const int, int>>());
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce Key and T from nothing
+    std::flat_multimap m;
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce Key and T from just (Compare)
+    std::flat_multimap m(std::less<int>{});
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce Key and T from just (Compare, Allocator)
+    std::flat_multimap m(std::less<int>{}, std::allocator<PC>{});
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce Key and T from just (Allocator)
+    std::flat_multimap m(std::allocator<PC>{});
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot convert from some arbitrary unrelated type
+    NotAnAllocator a;
+    std::flat_multimap m(a);
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce that the inner braced things should be std::pair and not something else
+    std::flat_multimap m{{1, 1L}, {2, 2L}, {3, 3L}};
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce that the inner braced things should be std::pair and not something else
+    std::flat_multimap m({{1, 1L}, {2, 2L}, {3, 3L}}, std::less<int>());
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce that the inner braced things should be std::pair and not something else
+    std::flat_multimap m({{1, 1L}, {2, 2L}, {3, 3L}}, std::less<int>(), std::allocator<PC>());
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // cannot deduce that the inner braced things should be std::pair and not something else
+    std::flat_multimap m({{1, 1L}, {2, 2L}, {3, 3L}}, std::allocator<PC>());
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // since we have parens, not braces, this deliberately does not find the initializer_list constructor
+    std::flat_multimap m(P{1, 1L});
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+  {
+    // since we have parens, not braces, this deliberately does not find the initializer_list constructor
+    std::flat_multimap m(PC{1, 1L});
+    // expected-error-re at -1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_multimap'}}}}
+  }
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct_pmr.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct_pmr.pass.cpp
new file mode 100644
index 00000000000000..1955a8806631b5
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/deduct_pmr.pass.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: availability-pmr-missing
+
+// <flat_map>
+
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <deque>
+#include <initializer_list>
+#include <list>
+#include <flat_map>
+#include <functional>
+#include <memory_resource>
+#include <ranges>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "test_allocator.h"
+
+using P  = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+void test_containers() {
+  std::deque<int, test_allocator<int>> ks({1, 2, 1, 2, 2, INT_MAX, 3}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> vs({1, 2, 3, 4, 5, 3, 4}, test_allocator<int>(0, 43));
+  std::deque<int, test_allocator<int>> sorted_ks({1, 1, 2, 2, 2, 3, INT_MAX}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> sorted_vs({1, 3, 2, 4, 5, 4, 3}, test_allocator<int>(0, 43));
+  const std::pair<int, short> expected[] = {{1, 1}, {1, 3}, {2, 2}, {2, 4}, {2, 5}, {3, 4}, {INT_MAX, 3}};
+  {
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::monotonic_buffer_resource mr2;
+    std::pmr::deque<int> pks(ks.begin(), ks.end(), &mr);
+    std::pmr::deque<short> pvs(vs.begin(), vs.end(), &mr);
+    std::flat_multimap s(std::move(pks), std::move(pvs), &mr2);
+
+    ASSERT_SAME_TYPE(
+        decltype(s), std::flat_multimap<int, short, std::less<int>, std::pmr::deque<int>, std::pmr::deque<short>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().resource() == &mr2);
+    assert(s.values().get_allocator().resource() == &mr2);
+  }
+  {
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::monotonic_buffer_resource mr2;
+    std::pmr::deque<int> pks(sorted_ks.begin(), sorted_ks.end(), &mr);
+    std::pmr::deque<short> pvs(sorted_vs.begin(), sorted_vs.end(), &mr);
+    std::flat_multimap s(std::sorted_equivalent, std::move(pks), std::move(pvs), &mr2);
+
+    ASSERT_SAME_TYPE(
+        decltype(s), std::flat_multimap<int, short, std::less<int>, std::pmr::deque<int>, std::pmr::deque<short>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().resource() == &mr2);
+    assert(s.values().get_allocator().resource() == &mr2);
+  }
+}
+
+void test_containers_compare() {
+  std::deque<int, test_allocator<int>> ks({1, 2, 1, 2, 2, INT_MAX, 3}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> vs({1, 2, 3, 4, 5, 3, 4}, test_allocator<int>(0, 43));
+  std::deque<int, test_allocator<int>> sorted_ks({INT_MAX, 3, 2, 2, 2, 1, 1}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> sorted_vs({3, 4, 2, 4, 5, 1, 3}, test_allocator<int>(0, 43));
+  const std::pair<int, short> expected[] = {{INT_MAX, 3}, {3, 4}, {2, 2}, {2, 4}, {2, 5}, {1, 1}, {1, 3}};
+
+  {
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::monotonic_buffer_resource mr2;
+    std::pmr::deque<int> pks(ks.begin(), ks.end(), &mr);
+    std::pmr::deque<short> pvs(vs.begin(), vs.end(), &mr);
+    std::flat_multimap s(std::move(pks), std::move(pvs), std::greater<int>(), &mr2);
+
+    ASSERT_SAME_TYPE(
+        decltype(s), std::flat_multimap<int, short, std::greater<int>, std::pmr::deque<int>, std::pmr::deque<short>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().resource() == &mr2);
+    assert(s.values().get_allocator().resource() == &mr2);
+  }
+  {
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::monotonic_buffer_resource mr2;
+    std::pmr::deque<int> pks(sorted_ks.begin(), sorted_ks.end(), &mr);
+    std::pmr::deque<short> pvs(sorted_vs.begin(), sorted_vs.end(), &mr);
+    std::flat_multimap s(std::sorted_equivalent, std::move(pks), std::move(pvs), std::greater<int>(), &mr2);
+
+    ASSERT_SAME_TYPE(
+        decltype(s), std::flat_multimap<int, short, std::greater<int>, std::pmr::deque<int>, std::pmr::deque<short>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().resource() == &mr2);
+    assert(s.values().get_allocator().resource() == &mr2);
+  }
+}
+
+int main(int, char**) {
+  test_containers();
+  test_containers_compare();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/default.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/default.pass.cpp
new file mode 100644
index 00000000000000..c910f748d95fed
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/default.pass.cpp
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap();
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <type_traits>
+#include <vector>
+
+#include "test_macros.h"
+#include "min_allocator.h"
+#include "test_allocator.h"
+
+struct DefaultCtableComp {
+  explicit DefaultCtableComp() { default_constructed_ = true; }
+  bool operator()(int, int) const { return false; }
+  bool default_constructed_ = false;
+};
+
+int main(int, char**) {
+  {
+    std::flat_multimap<int, char*> m;
+    assert(m.empty());
+  }
+  {
+    // explicit(false)
+    std::flat_multimap<int, char*> m = {};
+    assert(m.empty());
+  }
+  {
+    std::flat_multimap<int, char*, DefaultCtableComp, std::deque<int, min_allocator<int>>> m;
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.key_comp().default_constructed_);
+  }
+  {
+    using A1 = explicit_allocator<int>;
+    using A2 = explicit_allocator<char*>;
+    {
+      std::flat_multimap<int, char*, DefaultCtableComp, std::vector<int, A1>, std::vector<char*, A2>> m;
+      assert(m.empty());
+      assert(m.key_comp().default_constructed_);
+    }
+    {
+      A1 a1;
+      std::flat_multimap<int, int, DefaultCtableComp, std::vector<int, A1>, std::vector<int, A1>> m(a1);
+      assert(m.empty());
+      assert(m.key_comp().default_constructed_);
+    }
+  }
+  {
+    // If an allocator is given, it must be usable by both containers.
+    using A = test_allocator<int>;
+    using M = std::flat_multimap<int, int, std::less<>, std::vector<int>, std::vector<int, A>>;
+    static_assert(std::is_constructible_v<M>);
+    static_assert(!std::is_constructible_v<M, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, A>);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/default_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/default_noexcept.pass.cpp
new file mode 100644
index 00000000000000..298945d1b4b607
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/default_noexcept.pass.cpp
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap()
+//    noexcept(
+//        is_nothrow_default_constructible_v<key_container_type> &&
+//        is_nothrow_default_constructible_v<mapped_container_type> &&
+//        is_nothrow_default_constructible_v<key_compare>);
+
+// This tests a conforming extension
+
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+
+struct ThrowingCtorComp {
+  ThrowingCtorComp() noexcept(false) {}
+  bool operator()(const auto&, const auto&) const { return false; }
+};
+
+int main(int, char**) {
+#if defined(_LIBCPP_VERSION)
+  {
+    using C = std::flat_multimap<MoveOnly, MoveOnly>;
+    static_assert(std::is_nothrow_default_constructible_v<C>);
+  }
+  {
+    using C =
+        std::flat_multimap<MoveOnly, MoveOnly, std::less<MoveOnly>, std::vector<MoveOnly, test_allocator<MoveOnly>>>;
+    static_assert(std::is_nothrow_default_constructible_v<C>);
+  }
+#endif // _LIBCPP_VERSION
+  {
+    using C =
+        std::flat_multimap<MoveOnly, MoveOnly, std::less<MoveOnly>, std::vector<MoveOnly, other_allocator<MoveOnly>>>;
+    static_assert(!std::is_nothrow_default_constructible_v<C>);
+    C c;
+  }
+  {
+    using C = std::flat_multimap<MoveOnly, MoveOnly, ThrowingCtorComp>;
+    static_assert(!std::is_nothrow_default_constructible_v<C>);
+    C c;
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/dtor_noexcept.pass.cpp
new file mode 100644
index 00000000000000..5d729fecb81f24
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/dtor_noexcept.pass.cpp
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// ~flat_multimap();
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+
+struct ThrowingDtorComp {
+  bool operator()(const auto&, const auto&) const;
+  ~ThrowingDtorComp() noexcept(false);
+};
+
+int main(int, char**) {
+  {
+    using C = std::flat_multimap<MoveOnly, MoveOnly>;
+    static_assert(std::is_nothrow_destructible_v<C>);
+  }
+  {
+    using V = std::vector<MoveOnly, test_allocator<MoveOnly>>;
+    using C = std::flat_multimap<MoveOnly, MoveOnly, std::less<MoveOnly>, V, V>;
+    static_assert(std::is_nothrow_destructible_v<C>);
+  }
+  {
+    using V = std::deque<MoveOnly, other_allocator<MoveOnly>>;
+    using C = std::flat_multimap<MoveOnly, MoveOnly, std::greater<MoveOnly>, V, V>;
+    static_assert(std::is_nothrow_destructible_v<C>);
+  }
+#if defined(_LIBCPP_VERSION)
+  {
+    using C = std::flat_multimap<MoveOnly, MoveOnly, ThrowingDtorComp>;
+    static_assert(!std::is_nothrow_destructible_v<C>);
+  }
+#endif // _LIBCPP_VERSION
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/initializer_list.pass.cpp
new file mode 100644
index 00000000000000..8e89192ec0ea1f
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/initializer_list.pass.cpp
@@ -0,0 +1,159 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap(initializer_list<value_type> il, const key_compare& comp = key_compare());
+// template<class Alloc>
+//    flat_multimap(initializer_list<value_type> il, const Alloc& a);
+// template<class Alloc>
+//    flat_multimap(initializer_list<value_type> il, const key_compare& comp, const Alloc& a);
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <type_traits>
+#include <vector>
+
+#include "test_macros.h"
+#include "min_allocator.h"
+#include "test_allocator.h"
+
+#include "../../../test_compare.h"
+
+struct DefaultCtableComp {
+  explicit DefaultCtableComp() { default_constructed_ = true; }
+  bool operator()(int, int) const { return false; }
+  bool default_constructed_ = false;
+};
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_multimap<int, int, C, V1, V1>;
+    using M2 = std::flat_multimap<int, int, C, V1, V2>;
+    using M3 = std::flat_multimap<int, int, C, V2, V1>;
+    using IL = std::initializer_list<std::pair<int, int>>;
+    static_assert(std::is_constructible_v<M1, IL, const A1&>);
+    static_assert(!std::is_constructible_v<M1, IL, const A2&>);
+    static_assert(!std::is_constructible_v<M2, IL, const A2&>);
+    static_assert(!std::is_constructible_v<M3, IL, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, IL, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, IL, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, IL, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, IL, const C&, const A2&>);
+  }
+
+  {
+    // initializer_list<value_type> needs to match exactly
+    using M = std::flat_multimap<int, short>;
+    using C = typename M::key_compare;
+    static_assert(std::is_constructible_v<M, std::initializer_list<std::pair<int, short>>>);
+    static_assert(std::is_constructible_v<M, std::initializer_list<std::pair<int, short>>, C>);
+    static_assert(std::is_constructible_v<M, std::initializer_list<std::pair<int, short>>, C, std::allocator<int>>);
+    static_assert(std::is_constructible_v<M, std::initializer_list<std::pair<int, short>>, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, short>>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, short>>, C>);
+    static_assert(
+        !std::is_constructible_v<M, std::initializer_list<std::pair<const int, short>>, C, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, short>>, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, const short>>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, const short>>, C>);
+    static_assert(
+        !std::is_constructible_v<M, std::initializer_list<std::pair<const int, const short>>, C, std::allocator<int>>);
+    static_assert(
+        !std::is_constructible_v<M, std::initializer_list<std::pair<const int, const short>>, std::allocator<int>>);
+  }
+
+  std::pair<int, short> expected[] = {{1, 1}, {2, 2}, {2, 2}, {3, 3}, {3, 3}, {5, 2}};
+  {
+    // flat_multimap(initializer_list<value_type>);
+    using M                                         = std::flat_multimap<int, short>;
+    std::initializer_list<std::pair<int, short>> il = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}};
+    M m(il);
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // flat_multimap(initializer_list<value_type>);
+    // explicit(false)
+    using M = std::flat_multimap<int, short>;
+    M m     = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}};
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // flat_multimap(initializer_list<value_type>);
+    using M = std::flat_multimap<int, short, std::greater<int>, std::deque<int, min_allocator<int>>>;
+    M m     = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}};
+    assert(std::equal(m.rbegin(), m.rend(), expected, expected + 6));
+  }
+  {
+    using A = explicit_allocator<int>;
+    {
+      // flat_multimap(initializer_list<value_type>);
+      // different comparator
+      using M = std::flat_multimap<int, int, DefaultCtableComp, std::vector<int, A>, std::deque<int, A>>;
+      M m     = {{1, 1}, {2, 2}, {3, 3}};
+      assert(m.size() == 3);
+
+      std::pair<int, int> expected1[] = {{1, 1}, {2, 2}, {3, 3}};
+      assert(std::ranges::equal(m, expected1));
+      assert(m.key_comp().default_constructed_);
+    }
+    {
+      // flat_multimap(initializer_list<value_type>, const Allocator&);
+      using M = std::flat_multimap<int, int, std::greater<int>, std::deque<int, A>, std::vector<int, A>>;
+      A a;
+      M m({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, a);
+      assert(std::equal(m.rbegin(), m.rend(), expected, expected + 6));
+    }
+  }
+  {
+    // flat_multimap(initializer_list<value_type>, const key_compare&);
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, short, C>;
+    auto m  = M({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, C(10));
+    assert(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(10));
+
+    // explicit(false)
+    M m2 = {{{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, C(10)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(10));
+  }
+  {
+    // flat_multimap(initializer_list<value_type>, const key_compare&);
+    // Sorting uses the comparator that was passed in
+    using M = std::flat_multimap<int, short, std::function<bool(int, int)>, std::deque<int, min_allocator<int>>>;
+    auto m  = M({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, std::greater<int>());
+    assert(std::equal(m.rbegin(), m.rend(), expected, expected + 6));
+    assert(m.key_comp()(2, 1) == true);
+  }
+  {
+    // flat_multimap(initializer_list<value_type> il, const key_compare& comp, const Alloc& a);
+    using A = explicit_allocator<int>;
+    using M = std::flat_multimap<int, int, std::greater<int>, std::deque<int, A>, std::vector<int, A>>;
+    A a;
+    M m({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, {}, a);
+    assert(std::equal(m.rbegin(), m.rend(), expected, expected + 6));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/iter_iter.pass.cpp
new file mode 100644
index 00000000000000..c9c5e6c99d1c88
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/iter_iter.pass.cpp
@@ -0,0 +1,154 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class InputIterator>
+//   flat_multimap(InputIterator first, InputIterator last, const key_compare& comp = key_compare());
+// template<class InputIterator, class Allocator>
+//   flat_multimap(InputIterator first, InputIterator last, const Allocator& a);
+// template<class InputIterator, class Allocator>
+//   flat_multimap(InputIterator first, InputIterator last, const key_compare& comp, const Allocator& a);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C     = test_less<int>;
+    using A1    = test_allocator<int>;
+    using A2    = other_allocator<int>;
+    using V1    = std::vector<int, A1>;
+    using V2    = std::vector<int, A2>;
+    using M1    = std::flat_multimap<int, int, C, V1, V1>;
+    using M2    = std::flat_multimap<int, int, C, V1, V2>;
+    using M3    = std::flat_multimap<int, int, C, V2, V1>;
+    using Iter1 = typename M1::iterator;
+    using Iter2 = typename M2::iterator;
+    using Iter3 = typename M3::iterator;
+    static_assert(std::is_constructible_v<M1, Iter1, Iter1, const A1&>);
+    static_assert(!std::is_constructible_v<M1, Iter1, Iter1, const A2&>);
+    static_assert(!std::is_constructible_v<M2, Iter2, Iter2, const A2&>);
+    static_assert(!std::is_constructible_v<M3, Iter3, Iter3, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, Iter1, Iter1, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, Iter1, Iter1, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, Iter2, Iter2, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, Iter3, Iter3, const C&, const A2&>);
+  }
+
+  using P      = std::pair<int, short>;
+  P ar[]       = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}};
+  P expected[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {2, 7}, {3, 6}, {3, 8}, {3, 9}};
+  {
+    // flat_multimap(InputIterator , InputIterator)
+    // cpp17_input_iterator
+    using M = std::flat_multimap<int, short>;
+    auto m  = M(cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 9));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+
+    // explicit(false)
+    M m2 = {cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 9)};
+    assert(m2 == m);
+  }
+  {
+    // flat_multimap(InputIterator , InputIterator)
+    // greater
+    using M = std::flat_multimap<int, short, std::greater<int>, std::deque<int, min_allocator<int>>, std::deque<short>>;
+    auto m  = M(cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 9));
+    assert((m.keys() == std::deque<int, min_allocator<int>>{3, 3, 3, 2, 2, 2, 1, 1, 1}));
+    LIBCPP_ASSERT((m.values() == std::deque<short>{6, 8, 9, 4, 5, 7, 1, 2, 3}));
+  }
+  {
+    // flat_multimap(InputIterator , InputIterator)
+    // Test when the operands are of array type (also contiguous iterator type)
+    using M = std::flat_multimap<int, short, std::greater<int>, std::vector<int, min_allocator<int>>>;
+    auto m  = M(ar, ar);
+    assert(m.empty());
+  }
+  {
+    // flat_multimap(InputIterator , InputIterator, const key_compare&)
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, short, C, std::vector<int>, std::deque<short>>;
+    auto m  = M(ar, ar + 9, C(3));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(3));
+
+    // explicit(false)
+    M m2 = {ar, ar + 9, C(3)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(3));
+  }
+  {
+    // flat_multimap(InputIterator , InputIterator, const Allocator&)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    auto m   = M(ar, ar + 9, A1(5));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(InputIterator , InputIterator, const Allocator&)
+    // explicit(false)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    M m      = {ar, ar + 9, A1(5)}; // implicit ctor
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(InputIterator , InputIterator, const key_compare&, const Allocator&)
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, C, std::vector<int, A1>, std::deque<short, A2>>;
+    auto m   = M(ar, ar + 9, C(3), A1(5));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(3));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(InputIterator , InputIterator, const key_compare&, const Allocator&)
+    // explicit(false)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, std::less<int>, std::deque<int, A1>, std::vector<short, A2>>;
+    M m      = {ar, ar + 9, {}, A2(5)}; // implicit ctor
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/iter_iter_stability.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/iter_iter_stability.pass.cpp
new file mode 100644
index 00000000000000..a1539c3c831657
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/iter_iter_stability.pass.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class InputIterator>
+//   flat_multimap(InputIterator first, InputIterator last, const key_compare& comp = key_compare())
+//
+// libc++ uses stable_sort to ensure that flat_multimap's behavior matches map's,
+// in terms of which duplicate items are kept.
+// This tests a conforming extension.
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <flat_map>
+#include <random>
+#include <map>
+#include <vector>
+
+#include "test_macros.h"
+
+struct Mod256 {
+  bool operator()(int x, int y) const { return (x % 256) < (y % 256); }
+};
+
+int main(int, char**) {
+  std::mt19937 randomness;
+  std::pair<uint16_t, uint16_t> pairs[200];
+  for (auto& pair : pairs) {
+    pair = {uint16_t(randomness()), uint16_t(randomness())};
+  }
+
+  {
+    std::multimap<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200);
+    std::flat_multimap<uint16_t, uint16_t, Mod256> fm(pairs, pairs + 200);
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::multimap<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200, std::allocator<int>());
+    std::flat_multimap<uint16_t, uint16_t, Mod256> fm(pairs, pairs + 200, std::allocator<int>());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::multimap<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200, Mod256());
+    std::flat_multimap<uint16_t, uint16_t, Mod256> fm(pairs, pairs + 200, Mod256());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::multimap<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200, Mod256(), std::allocator<int>());
+    std::flat_multimap<uint16_t, uint16_t, Mod256> fm(pairs, pairs + 200, Mod256(), std::allocator<int>());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move.pass.cpp
new file mode 100644
index 00000000000000..893c9247959d66
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move.pass.cpp
@@ -0,0 +1,89 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap(flat_multimap&&);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+int main(int, char**) {
+  {
+    using C = test_less<int>;
+    using A = test_allocator<int>;
+    using M = std::flat_multimap<int, int, C, std::vector<int, A>, std::deque<int, A>>;
+    M mo    = M({{1, 1}, {1, 2}, {3, 1}}, C(5), A(7));
+    M m     = std::move(mo);
+    assert((m == M{{1, 1}, {1, 2}, {3, 1}}));
+    assert(m.key_comp() == C(5));
+    assert(m.keys().get_allocator() == A(7));
+    assert(m.values().get_allocator() == A(7));
+
+    assert(mo.empty());
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys().get_allocator().get_id() == test_alloc_base::moved_value);
+    assert(mo.values().get_allocator().get_id() == test_alloc_base::moved_value);
+  }
+  {
+    using C = test_less<int>;
+    using A = min_allocator<int>;
+    using M = std::flat_multimap<int, int, C, std::vector<int, A>, std::deque<int, A>>;
+    M mo    = M({{1, 1}, {1, 2}, {3, 1}}, C(5), A());
+    M m     = std::move(mo);
+    assert((m == M{{1, 1}, {1, 2}, {3, 1}}));
+    assert(m.key_comp() == C(5));
+    assert(m.keys().get_allocator() == A());
+    assert(m.values().get_allocator() == A());
+
+    assert(mo.empty());
+    assert(mo.key_comp() == C(5));
+    assert(m.keys().get_allocator() == A());
+    assert(m.values().get_allocator() == A());
+  }
+  {
+    // A moved-from flat_multimap maintains its class invariant in the presence of moved-from comparators.
+    using M = std::flat_multimap<int, int, std::function<bool(int, int)>>;
+    M mo    = M({{1, 1}, {1, 2}, {3, 1}}, std::less<int>());
+    M m     = std::move(mo);
+    assert(m.size() == 3);
+    assert(std::is_sorted(m.begin(), m.end(), m.value_comp()));
+    assert(m.key_comp()(1, 2) == true);
+
+    assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp()));
+    LIBCPP_ASSERT(m.key_comp()(1, 2) == true);
+    LIBCPP_ASSERT(mo.empty());
+    mo.insert({{1, 1}, {1, 2}, {3, 1}}); // insert has no preconditions
+    assert(m == mo);
+  }
+  {
+    // moved-from object maintains invariant if one of underlying container does not clear after move
+    using M = std::flat_multimap<int, int, std::less<>, std::vector<int>, CopyOnlyVector<int>>;
+    M m1    = M({1, 1, 3}, {1, 2, 3});
+    M m2    = std::move(m1);
+    assert(m2.size() == 3);
+    check_invariant(m1);
+    LIBCPP_ASSERT(m1.empty());
+    LIBCPP_ASSERT(m1.keys().size() == 0);
+    LIBCPP_ASSERT(m1.values().size() == 0);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_alloc.pass.cpp
new file mode 100644
index 00000000000000..a0259e805ac5ac
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_alloc.pass.cpp
@@ -0,0 +1,82 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap(flat_multimap&&, const allocator_type&);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_multimap<int, int, C, V1, V1>;
+    using M2 = std::flat_multimap<int, int, C, V1, V2>;
+    using M3 = std::flat_multimap<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, M1&&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, M1&&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, M2&&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, M3&&, const A2&>);
+  }
+  {
+    std::pair<int, int> expected[] = {{1, 1}, {1, 2}, {2, 3}, {2, 2}, {3, 1}};
+    using C                        = test_less<int>;
+    using A                        = test_allocator<int>;
+    using M                        = std::flat_multimap<int, int, C, std::vector<int, A>, std::deque<int, A>>;
+    auto mo                        = M(expected, expected + 5, C(5), A(7));
+    auto m                         = M(std::move(mo), A(3));
+
+    assert(m.key_comp() == C(5));
+    assert(m.size() == 5);
+    auto [keys, values] = std::move(m).extract();
+    assert(keys.get_allocator() == A(3));
+    assert(values.get_allocator() == A(3));
+    assert(std::ranges::equal(keys, expected | std::views::elements<0>));
+    assert(std::ranges::equal(values, expected | std::views::elements<1>));
+
+    // The original flat_multimap is moved-from.
+    assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp()));
+    assert(mo.empty());
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys().get_allocator() == A(7));
+    assert(mo.values().get_allocator() == A(7));
+  }
+  {
+    // moved-from object maintains invariant if one of underlying container does not clear after move
+    using M = std::flat_multimap<int, int, std::less<>, std::vector<int>, CopyOnlyVector<int>>;
+    M m1    = M({1, 1, 3}, {1, 2, 3});
+    M m2(std::move(m1), std::allocator<int>{});
+    assert(m2.size() == 3);
+    check_invariant(m1);
+    LIBCPP_ASSERT(m1.empty());
+    LIBCPP_ASSERT(m1.keys().size() == 0);
+    LIBCPP_ASSERT(m1.values().size() == 0);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign.pass.cpp
new file mode 100644
index 00000000000000..38200d008c78a3
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign.pass.cpp
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap& operator=(flat_multimap&&);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+int main(int, char**) {
+  {
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<char>;
+    using M  = std::flat_multimap<int, char, C, std::vector<int, A1>, std::vector<char, A2>>;
+    M mo     = M({{1, 1}, {1, 3}, {3, 2}}, C(5), A1(7));
+    M m      = M({}, C(3), A1(7));
+    m        = std::move(mo);
+    assert((m == M{{1, 1}, {1, 3}, {3, 2}}));
+    assert(m.key_comp() == C(5));
+    auto [ks, vs] = std::move(m).extract();
+    assert(ks.get_allocator() == A1(7));
+    assert(vs.get_allocator() == A2(7));
+    assert(mo.empty());
+  }
+  {
+    using C  = test_less<int>;
+    using A1 = other_allocator<int>;
+    using A2 = other_allocator<char>;
+    using M  = std::flat_multimap<int, char, C, std::deque<int, A1>, std::deque<char, A2>>;
+    M mo     = M({{4, 5}, {4, 4}}, C(5), A1(7));
+    M m      = M({{1, 1}, {1, 2}, {1, 3}, {4, 4}}, C(3), A1(7));
+    m        = std::move(mo);
+    assert((m == M{{4, 5}, {4, 4}}));
+    assert(m.key_comp() == C(5));
+    auto [ks, vs] = std::move(m).extract();
+    assert(ks.get_allocator() == A1(7));
+    assert(vs.get_allocator() == A2(7));
+    assert(mo.empty());
+  }
+  {
+    using A = min_allocator<int>;
+    using M = std::flat_multimap<int, int, std::greater<int>, std::vector<int, A>, std::vector<int, A>>;
+    M mo    = M({{5, 1}, {5, 2}, {3, 3}}, A());
+    M m     = M({{4, 4}, {4, 3}, {4, 2}, {1, 1}}, A());
+    m       = std::move(mo);
+    assert((m == M{{5, 1}, {5, 2}, {3, 3}}));
+    auto [ks, vs] = std::move(m).extract();
+    assert(ks.get_allocator() == A());
+    assert(vs.get_allocator() == A());
+    assert(mo.empty());
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign_clears.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign_clears.pass.cpp
new file mode 100644
index 00000000000000..bc65dca32899cf
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign_clears.pass.cpp
@@ -0,0 +1,101 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap& operator=(flat_multimap&&);
+// Preserves the class invariant for the moved-from flat_multimap.
+
+#include <algorithm>
+#include <cassert>
+#include <compare>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+
+struct MoveNegates {
+  int value_    = 0;
+  MoveNegates() = default;
+  MoveNegates(int v) : value_(v) {}
+  MoveNegates(MoveNegates&& rhs) : value_(rhs.value_) { rhs.value_ = -rhs.value_; }
+  MoveNegates& operator=(MoveNegates&& rhs) {
+    value_     = rhs.value_;
+    rhs.value_ = -rhs.value_;
+    return *this;
+  }
+  ~MoveNegates()                             = default;
+  auto operator<=>(const MoveNegates&) const = default;
+};
+
+struct MoveClears {
+  int value_   = 0;
+  MoveClears() = default;
+  MoveClears(int v) : value_(v) {}
+  MoveClears(MoveClears&& rhs) : value_(rhs.value_) { rhs.value_ = 0; }
+  MoveClears& operator=(MoveClears&& rhs) {
+    value_     = rhs.value_;
+    rhs.value_ = 0;
+    return *this;
+  }
+  ~MoveClears()                             = default;
+  auto operator<=>(const MoveClears&) const = default;
+};
+
+int main(int, char**) {
+  {
+    const std::pair<int, int> expected[] = {{1, 1}, {1, 2}, {3, 3}, {3, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+    using M = std::flat_multimap<MoveNegates, int, std::less<MoveNegates>, std::vector<MoveNegates>>;
+    M m     = M(expected, expected + 8);
+    M m2    = M(expected, expected + 3);
+
+    m2 = std::move(m);
+
+    assert(std::equal(m2.begin(), m2.end(), expected, expected + 8));
+    LIBCPP_ASSERT(m.empty());
+    check_invariant(m);
+    m.insert({1, 1});
+    m.insert({2, 2});
+    assert(m.contains(1));
+    assert(m.find(2) != m.end());
+  }
+  {
+    const std::pair<int, int> expected[] = {{1, 1}, {1, 2}, {3, 3}, {4, 4}, {5, 5}, {5, 6}, {7, 7}, {8, 8}};
+    using M = std::flat_multimap<MoveClears, int, std::less<MoveClears>, std::vector<MoveClears>>;
+    M m     = M(expected, expected + 8);
+    M m2    = M(expected, expected + 3);
+
+    m2 = std::move(m);
+
+    assert(std::equal(m2.begin(), m2.end(), expected, expected + 8));
+    LIBCPP_ASSERT(m.empty());
+    check_invariant(m);
+    m.insert({1, 1});
+    m.insert({2, 2});
+    assert(m.contains(1));
+    assert(m.find(2) != m.end());
+  }
+  {
+    // moved-from object maintains invariant if one of underlying container does not clear after move
+    using M = std::flat_multimap<int, int, std::less<>, std::vector<int>, CopyOnlyVector<int>>;
+    M m1    = M({1, 1, 3}, {1, 2, 3});
+    M m2    = M({1, 1}, {1, 2});
+    m2      = std::move(m1);
+    assert(m2.size() == 3);
+    check_invariant(m1);
+    LIBCPP_ASSERT(m1.empty());
+    LIBCPP_ASSERT(m1.keys().size() == 0);
+    LIBCPP_ASSERT(m1.values().size() == 0);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign_noexcept.pass.cpp
new file mode 100644
index 00000000000000..4eb58313f6f726
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_assign_noexcept.pass.cpp
@@ -0,0 +1,110 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap& operator=(flat_multimap&& c)
+//     noexcept(
+//          is_nothrow_move_assignable<key_container_type>::value &&
+//          is_nothrow_move_assignable<mapped_container_type>::value &&
+//          is_nothrow_copy_assignable<key_compare>::value);
+
+// This tests a conforming extension
+
+#include <flat_map>
+#include <functional>
+#include <memory_resource>
+#include <type_traits>
+#include <vector>
+
+#include "MoveOnly.h"
+#include "test_allocator.h"
+#include "test_macros.h"
+
+struct MoveSensitiveComp {
+  MoveSensitiveComp() noexcept(false)                         = default;
+  MoveSensitiveComp(const MoveSensitiveComp&) noexcept(false) = default;
+  MoveSensitiveComp(MoveSensitiveComp&& rhs) { rhs.is_moved_from_ = true; }
+  MoveSensitiveComp& operator=(const MoveSensitiveComp&) noexcept = default;
+  MoveSensitiveComp& operator=(MoveSensitiveComp&& rhs) {
+    rhs.is_moved_from_ = true;
+    return *this;
+  }
+  bool operator()(const auto&, const auto&) const { return false; }
+  bool is_moved_from_ = false;
+};
+
+struct MoveThrowsComp {
+  MoveThrowsComp(MoveThrowsComp&&) noexcept(false);
+  MoveThrowsComp(const MoveThrowsComp&) noexcept(true);
+  MoveThrowsComp& operator=(MoveThrowsComp&&) noexcept(false);
+  MoveThrowsComp& operator=(const MoveThrowsComp&) noexcept(true);
+  bool operator()(const auto&, const auto&) const;
+};
+
+int main(int, char**) {
+  {
+    using C = std::flat_multimap<int, int>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    using C =
+        std::flat_multimap<MoveOnly,
+                           int,
+                           std::less<MoveOnly>,
+                           std::vector<MoveOnly, test_allocator<MoveOnly>>,
+                           std::vector<int, test_allocator<int>>>;
+    static_assert(!std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    using C =
+        std::flat_multimap<int,
+                           MoveOnly,
+                           std::less<int>,
+                           std::vector<int, test_allocator<int>>,
+                           std::vector<MoveOnly, test_allocator<MoveOnly>>>;
+    static_assert(!std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    using C =
+        std::flat_multimap<MoveOnly,
+                           int,
+                           std::less<MoveOnly>,
+                           std::vector<MoveOnly, other_allocator<MoveOnly>>,
+                           std::vector<int, other_allocator<int>>>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    using C =
+        std::flat_multimap<int,
+                           MoveOnly,
+                           std::less<int>,
+                           std::vector<int, other_allocator<int>>,
+                           std::vector<MoveOnly, other_allocator<MoveOnly>>>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    // Test with a comparator that throws on move-assignment.
+    using C = std::flat_multimap<int, int, MoveThrowsComp>;
+    LIBCPP_STATIC_ASSERT(!std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    // Test with a container that throws on move-assignment.
+    using C = std::flat_multimap<int, int, std::less<int>, std::pmr::vector<int>, std::vector<int>>;
+    static_assert(!std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    // Test with a container that throws on move-assignment.
+    using C = std::flat_multimap<int, int, std::less<int>, std::vector<int>, std::pmr::vector<int>>;
+    static_assert(!std::is_nothrow_move_assignable_v<C>);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_exceptions.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_exceptions.pass.cpp
new file mode 100644
index 00000000000000..c2085e32be5326
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_exceptions.pass.cpp
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: no-exceptions
+
+// <flat_map>
+
+// flat_multimap(flat_multimap&& s);
+// If any member function in [flat.multimap.defn] exits via an exception, the invariant is restored.
+
+#include <algorithm>
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+
+static int countdown = 0;
+
+struct EvilContainer : std::vector<int> {
+  EvilContainer() = default;
+  EvilContainer(EvilContainer&& rhs) {
+    // Throw on move-construction.
+    if (--countdown == 0) {
+      rhs.insert(rhs.end(), 0);
+      rhs.insert(rhs.end(), 0);
+      throw 42;
+    }
+  }
+};
+
+int main(int, char**) {
+  {
+    using M   = std::flat_multimap<int, int, std::less<int>, EvilContainer, std::vector<int>>;
+    M mo      = {{1, 1}, {1, 2}, {3, 3}};
+    countdown = 1;
+    try {
+      M m = std::move(mo);
+      assert(false); // not reached
+    } catch (int x) {
+      assert(x == 42);
+    }
+    // The source flat_multimap maintains its class invariant.
+    check_invariant(mo);
+    LIBCPP_ASSERT(mo.empty());
+  }
+  {
+    using M   = std::flat_multimap<int, int, std::less<int>, std::vector<int>, EvilContainer>;
+    M mo      = {{1, 1}, {1, 2}, {3, 3}};
+    countdown = 1;
+    try {
+      M m = std::move(mo);
+      assert(false); // not reached
+    } catch (int x) {
+      assert(x == 42);
+    }
+    // The source flat_multimap maintains its class invariant.
+    check_invariant(mo);
+    LIBCPP_ASSERT(mo.empty());
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_noexcept.pass.cpp
new file mode 100644
index 00000000000000..e038902e26d52a
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/move_noexcept.pass.cpp
@@ -0,0 +1,104 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap(flat_multimap&&)
+//        noexcept(is_nothrow_move_constructible<key_container_type>::value &&
+//                 is_nothrow_move_constructible<mapped_container_type>::value &&
+//                 is_nothrow_copy_constructible<key_compare>::value);
+
+// This tests a conforming extension
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+
+template <class T>
+struct ThrowingMoveAllocator {
+  using value_type                                    = T;
+  explicit ThrowingMoveAllocator()                    = default;
+  ThrowingMoveAllocator(const ThrowingMoveAllocator&) = default;
+  ThrowingMoveAllocator(ThrowingMoveAllocator&&) noexcept(false) {}
+  T* allocate(std::ptrdiff_t n) { return std::allocator<T>().allocate(n); }
+  void deallocate(T* p, std::ptrdiff_t n) { return std::allocator<T>().deallocate(p, n); }
+  friend bool operator==(ThrowingMoveAllocator, ThrowingMoveAllocator) = default;
+};
+
+struct ThrowingMoveComp {
+  ThrowingMoveComp() = default;
+  ThrowingMoveComp(const ThrowingMoveComp&) noexcept(true) {}
+  ThrowingMoveComp(ThrowingMoveComp&&) noexcept(false) {}
+  bool operator()(const auto&, const auto&) const { return false; }
+};
+
+struct MoveSensitiveComp {
+  MoveSensitiveComp() noexcept(false)                  = default;
+  MoveSensitiveComp(const MoveSensitiveComp&) noexcept = default;
+  MoveSensitiveComp(MoveSensitiveComp&& rhs) { rhs.is_moved_from_ = true; }
+  MoveSensitiveComp& operator=(const MoveSensitiveComp&) noexcept(false) = default;
+  MoveSensitiveComp& operator=(MoveSensitiveComp&& rhs) {
+    rhs.is_moved_from_ = true;
+    return *this;
+  }
+  bool operator()(const auto&, const auto&) const { return false; }
+  bool is_moved_from_ = false;
+};
+
+int main(int, char**) {
+  {
+    using C = std::flat_multimap<int, int>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+  {
+    using C = std::flat_multimap<int, int, std::less<int>, std::deque<int, test_allocator<int>>>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+#if _LIBCPP_VERSION
+  {
+    // Container fails to be nothrow-move-constructible; this relies on libc++'s support for non-nothrow-copyable allocators
+    using C =
+        std::flat_multimap<int, int, std::less<int>, std::deque<int, ThrowingMoveAllocator<int>>, std::vector<int>>;
+    static_assert(!std::is_nothrow_move_constructible_v<std::deque<int, ThrowingMoveAllocator<int>>>);
+    static_assert(!std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+  {
+    // Container fails to be nothrow-move-constructible; this relies on libc++'s support for non-nothrow-copyable allocators
+    using C =
+        std::flat_multimap<int, int, std::less<int>, std::vector<int>, std::deque<int, ThrowingMoveAllocator<int>>>;
+    static_assert(!std::is_nothrow_move_constructible_v<std::deque<int, ThrowingMoveAllocator<int>>>);
+    static_assert(!std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+#endif // _LIBCPP_VERSION
+  {
+    // Comparator fails to be nothrow-move-constructible
+    using C = std::flat_multimap<int, int, ThrowingMoveComp>;
+    static_assert(!std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/pmr.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/pmr.pass.cpp
new file mode 100644
index 00000000000000..8b518f6afbda9c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/pmr.pass.cpp
@@ -0,0 +1,361 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: availability-pmr-missing
+
+// <flat_map>
+
+// Test various constructors with pmr
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <memory_resource>
+#include <ranges>
+#include <vector>
+#include <string>
+
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "test_allocator.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // flat_multimap(const Allocator& a);
+    using M = std::flat_multimap<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::polymorphic_allocator<int> pa = &mr;
+    auto m1                                 = M(pa);
+    assert(m1.empty());
+    assert(m1.keys().get_allocator() == pa);
+    assert(m1.values().get_allocator() == pa);
+    auto m2 = M(&mr);
+    assert(m2.empty());
+    assert(m2.keys().get_allocator() == pa);
+    assert(m2.values().get_allocator() == pa);
+  }
+  {
+    // flat_multimap(const key_compare& comp, const Alloc& a);
+    using M = std::flat_multimap<int, int, std::function<bool(int, int)>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    vm.emplace_back(std::greater<int>());
+    assert(vm[0] == M{});
+    assert(vm[0].key_comp()(2, 1) == true);
+    assert(vm[0].value_comp()({2, 0}, {1, 0}) == true);
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_multimap(const key_container_type& key_cont, const mapped_container_type& mapped_cont,
+    //          const Allocator& a);
+    using M = std::flat_multimap<int, int, std::less<int>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pmr::vector<int> ks = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    std::pmr::vector<int> vs = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    assert(ks.get_allocator().resource() != &mr);
+    assert(vs.get_allocator().resource() != &mr);
+    vm.emplace_back(ks, vs);
+    assert(ks.size() == 9); // ks' value is unchanged, since it was an lvalue above
+    assert(vs.size() == 9); // vs' value is unchanged, since it was an lvalue above
+    assert((vm[0] == M{{1, 1}, {1, 1}, {1, 1}, {2, 2}, {2, 2}, {2, 2}, {3, 3}, {3, 3}, {3, 3}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_multimap(const flat_multimap&, const allocator_type&);
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr1;
+    std::pmr::monotonic_buffer_resource mr2;
+    M mo = M({1, 2, 1}, {2, 2, 1}, C(5), &mr1);
+    M m  = {mo, &mr2}; // also test the implicitness of this constructor
+
+    assert(m.key_comp() == C(5));
+    assert((m.keys() == std::pmr::vector<int>{1, 1, 2}));
+    assert((m.values() == std::pmr::vector<int>{2, 1, 2}));
+    assert(m.keys().get_allocator().resource() == &mr2);
+    assert(m.values().get_allocator().resource() == &mr2);
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert((mo.keys() == std::pmr::vector<int>{1, 1, 2}));
+    assert((mo.values() == std::pmr::vector<int>{2, 1, 2}));
+    assert(mo.keys().get_allocator().resource() == &mr1);
+    assert(mo.values().get_allocator().resource() == &mr1);
+  }
+  {
+    // flat_multimap(const flat_multimap&, const allocator_type&);
+    using M = std::flat_multimap<int, int, std::less<>, std::pmr::vector<int>, std::pmr::deque<int>>;
+    std::pmr::vector<M> vs;
+    M m = {{1, 2}, {1, 2}, {3, 1}};
+    vs.push_back(m);
+    assert(vs[0] == m);
+  }
+  {
+    // flat_multimap& operator=(const flat_multimap& m);
+    // pmr allocator is not propagated
+    using M = std::flat_multimap<int, int, std::less<>, std::pmr::deque<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr1;
+    std::pmr::monotonic_buffer_resource mr2;
+    M mo = M({{1, 1}, {1, 2}, {3, 3}}, &mr1);
+    M m  = M({{4, 4}, {4, 5}}, &mr2);
+    m    = mo;
+    assert((m == M{{1, 1}, {1, 2}, {3, 3}}));
+    assert(m.keys().get_allocator().resource() == &mr2);
+    assert(m.values().get_allocator().resource() == &mr2);
+
+    // mo is unchanged
+    assert((mo == M{{1, 1}, {1, 2}, {3, 3}}));
+    assert(mo.keys().get_allocator().resource() == &mr1);
+  }
+  {
+    // flat_multimap(const flat_multimap& m);
+    using C = test_less<int>;
+    std::pmr::monotonic_buffer_resource mr;
+    using M = std::flat_multimap<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    auto mo = M({{1, 1}, {1, 2}, {3, 3}}, C(5), &mr);
+    auto m  = mo;
+
+    assert(m.key_comp() == C(5));
+    assert((m == M{{1, 1}, {1, 2}, {3, 3}}));
+    auto [ks, vs] = std::move(m).extract();
+    assert(ks.get_allocator().resource() == std::pmr::get_default_resource());
+    assert(vs.get_allocator().resource() == std::pmr::get_default_resource());
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert((mo == M{{1, 1}, {1, 2}, {3, 3}}));
+    auto [kso, vso] = std::move(mo).extract();
+    assert(kso.get_allocator().resource() == &mr);
+    assert(vso.get_allocator().resource() == &mr);
+  }
+  {
+    //  flat_multimap(initializer_list<value_type> il, const Alloc& a);
+    using M = std::flat_multimap<int, int, std::less<int>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::initializer_list<M::value_type> il = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}};
+    vm.emplace_back(il);
+    assert((vm[0] == M{{1, 1}, {1, 1}, {3, 3}, {4, 4}, {5, 5}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    //  flat_multimap(initializer_list<value_type> il, const key_compare& comp, const Alloc& a);
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, int, C, std::pmr::vector<int>, std::pmr::deque<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::initializer_list<M::value_type> il = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}};
+    vm.emplace_back(il, C(5));
+    assert((vm[0] == M{{1, 1}, {1, 1}, {3, 3}, {4, 4}, {5, 5}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+    assert(vm[0].key_comp() == C(5));
+  }
+  {
+    // flat_multimap(InputIterator first, InputIterator last, const Allocator& a);
+    using P      = std::pair<int, short>;
+    P ar[]       = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}};
+    P expected[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {2, 7}, {3, 6}, {3, 8}, {3, 9}};
+    {
+      //  cpp17 iterator
+      using M = std::flat_multimap<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+      std::pmr::monotonic_buffer_resource mr;
+      std::pmr::vector<M> vm(&mr);
+      vm.emplace_back(cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 9));
+      assert(std::ranges::equal(vm[0].keys(), expected | std::views::elements<0>));
+      LIBCPP_ASSERT(std::ranges::equal(vm[0], expected));
+      assert(vm[0].keys().get_allocator().resource() == &mr);
+      assert(vm[0].values().get_allocator().resource() == &mr);
+    }
+    {
+      using M = std::flat_multimap<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+      std::pmr::monotonic_buffer_resource mr;
+      std::pmr::vector<M> vm(&mr);
+      vm.emplace_back(ar, ar);
+      assert(vm[0].empty());
+      assert(vm[0].keys().get_allocator().resource() == &mr);
+      assert(vm[0].values().get_allocator().resource() == &mr);
+    }
+  }
+  {
+    // flat_multimap(flat_multimap&&, const allocator_type&);
+    std::pair<int, int> expected[] = {{1, 1}, {1, 1}, {2, 2}, {3, 1}};
+    using C                        = test_less<int>;
+    using M                        = std::flat_multimap<int, int, C, std::pmr::vector<int>, std::pmr::deque<int>>;
+    std::pmr::monotonic_buffer_resource mr1;
+    std::pmr::monotonic_buffer_resource mr2;
+    M mo = M({{1, 1}, {3, 1}, {1, 1}, {2, 2}}, C(5), &mr1);
+    M m  = {std::move(mo), &mr2}; // also test the implicitness of this constructor
+
+    assert(m.key_comp() == C(5));
+    assert(m.size() == 4);
+    assert(m.keys().get_allocator().resource() == &mr2);
+    assert(m.values().get_allocator().resource() == &mr2);
+    assert(std::ranges::equal(m, expected));
+
+    // The original flat_multimap is moved-from.
+    assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp()));
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys().get_allocator().resource() == &mr1);
+    assert(mo.values().get_allocator().resource() == &mr1);
+  }
+  {
+    // flat_multimap(flat_multimap&&, const allocator_type&);
+    using M = std::flat_multimap<int, int, std::less<>, std::pmr::deque<int>, std::pmr::vector<int>>;
+    std::pmr::vector<M> vs;
+    M m = {{1, 1}, {3, 1}, {1, 1}, {2, 2}};
+    vs.push_back(std::move(m));
+    assert((vs[0].keys() == std::pmr::deque<int>{1, 1, 2, 3}));
+    assert((vs[0].values() == std::pmr::vector<int>{1, 1, 2, 1}));
+  }
+  {
+    // flat_multimap& operator=(flat_multimap&&);
+    using M = std::
+        flat_multimap<std::pmr::string, int, std::less<>, std::pmr::vector<std::pmr::string>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr1;
+    std::pmr::monotonic_buffer_resource mr2;
+    M mo = M({{"short", 1},
+              {"very long string that definitely won't fit in the SSO buffer and therefore becomes empty on move", 2}},
+             &mr1);
+    M m  = M({{"don't care", 3}}, &mr2);
+    m    = std::move(mo);
+    assert(m.size() == 2);
+    assert(std::is_sorted(m.begin(), m.end(), m.value_comp()));
+    assert(m.begin()->first.get_allocator().resource() == &mr2);
+
+    assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp()));
+    mo.insert({"foo", 1});
+    assert(mo.begin()->first.get_allocator().resource() == &mr1);
+  }
+  {
+    //  flat_multimap(from_range_t, R&&, const Alloc&);
+    using P      = std::pair<int, short>;
+    P ar[]       = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}};
+    P expected[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {2, 7}, {3, 6}, {3, 8}, {3, 9}};
+    {
+      // input_range
+      using M    = std::flat_multimap<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+      using Iter = cpp20_input_iterator<const P*>;
+      using Sent = sentinel_wrapper<Iter>;
+      using R    = std::ranges::subrange<Iter, Sent>;
+      std::pmr::monotonic_buffer_resource mr;
+      std::pmr::vector<M> vm(&mr);
+      vm.emplace_back(std::from_range, R(Iter(ar), Sent(Iter(ar + 9))));
+      assert(std::ranges::equal(vm[0].keys(), expected | std::views::elements<0>));
+      LIBCPP_ASSERT(std::ranges::equal(vm[0], expected));
+      assert(vm[0].keys().get_allocator().resource() == &mr);
+      assert(vm[0].values().get_allocator().resource() == &mr);
+    }
+    {
+      using M = std::flat_multimap<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+      using R = std::ranges::subrange<const P*>;
+      std::pmr::monotonic_buffer_resource mr;
+      std::pmr::vector<M> vm(&mr);
+      vm.emplace_back(std::from_range, R(ar, ar));
+      assert(vm[0].empty());
+      assert(vm[0].keys().get_allocator().resource() == &mr);
+      assert(vm[0].values().get_allocator().resource() == &mr);
+    }
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, const key_container_type& key_cont,
+    //          const mapped_container_type& mapped_cont, const Alloc& a);
+    using M = std::flat_multimap<int, int, std::less<int>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pmr::vector<int> ks = {1, 1, 4, 10};
+    std::pmr::vector<int> vs = {4, 3, 2, 1};
+    vm.emplace_back(std::sorted_equivalent, ks, vs);
+    assert(!ks.empty()); // it was an lvalue above
+    assert(!vs.empty()); // it was an lvalue above
+    assert((vm[0] == M{{1, 4}, {1, 3}, {4, 2}, {10, 1}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, const key_container_type& key_cont,
+    //          const mapped_container_type& mapped_cont, const Alloc& a);
+    using M = std::flat_multimap<int, int, std::less<int>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pmr::vector<int> ks({1, 1, 4, 10}, &mr);
+    std::pmr::vector<int> vs({4, 3, 2, 1}, &mr);
+    vm.emplace_back(std::sorted_equivalent, ks, vs);
+    assert((vm[0] == M{{1, 4}, {1, 3}, {4, 2}, {10, 1}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, initializer_list<value_type> il, const Alloc& a);
+    // cpp_17
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    using P = std::pair<int, int>;
+    P ar[]  = {{1, 1}, {1, 2}, {1, 4}, {5, 5}};
+    vm.emplace_back(
+        std::sorted_equivalent, cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 4), C(3));
+    assert((vm[0] == M{{1, 1}, {1, 2}, {1, 4}, {5, 5}}));
+    assert(vm[0].key_comp() == C(3));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, initializer_list<value_type> il, const Alloc& a);
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pair<int, int> ar[1] = {{42, 42}};
+    vm.emplace_back(std::sorted_equivalent, ar, ar, C(4));
+    assert(vm[0] == M{});
+    assert(vm[0].key_comp() == C(4));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_multimap(InputIterator first, InputIterator last, const Alloc& a);
+    // cpp_17
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    using P = std::pair<int, int>;
+    P ar[]  = {{1, 1}, {1, 2}, {1, 4}, {5, 5}};
+    vm.emplace_back(
+        std::sorted_equivalent, cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 4), C(3));
+    assert((vm[0] == M{{1, 1}, {1, 2}, {1, 4}, {5, 5}}));
+    assert(vm[0].key_comp() == C(3));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_multimap(InputIterator first, InputIterator last, const Alloc& a);
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pair<int, int> ar[1] = {{42, 42}};
+    vm.emplace_back(std::sorted_equivalent, ar, ar, C(4));
+    assert(vm[0] == M{});
+    assert(vm[0].key_comp() == C(4));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/range.pass.cpp
new file mode 100644
index 00000000000000..de750e2506341f
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/range.pass.cpp
@@ -0,0 +1,227 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<container-compatible-range<value_type> R>
+//     flat_multimap(from_range_t, R&&)
+// template<container-compatible-range<value_type> R>
+//     flat_multimap(from_range_t, R&&, const key_compare&)
+// template<container-compatible-range<value_type> R, class Alloc>
+//      flat_multimap(from_range_t, R&&, const Alloc&);
+// template<container-compatible-range<value_type> R, class Alloc>
+//      flat_multimap(from_range_t, R&&, const key_compare&, const Alloc&);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+// test constraint container-compatible-range
+
+template <class V>
+using RangeOf = std::ranges::subrange<V*>;
+using Map     = std::flat_multimap<int, double>;
+
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<int, double>>>);
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<short, double>>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<double>>);
+
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<int, double>>, std::less<int>>);
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<short, double>>, std::less<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<int>, std::less<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<double>, std::less<int>>);
+
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<int, double>>, std::allocator<int>>);
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<short, double>>, std::allocator<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<int>, std::allocator<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<double>, std::allocator<int>>);
+
+static_assert(std::is_constructible_v<Map,
+                                      std::from_range_t,
+                                      RangeOf<std::pair<int, double>>,
+                                      std::less<int>,
+                                      std::allocator<int>>);
+static_assert(std::is_constructible_v<Map,
+                                      std::from_range_t,
+                                      RangeOf<std::pair<short, double>>,
+                                      std::less<int>,
+                                      std::allocator<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<int>, std::less<int>, std::allocator<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<double>, std::less<int>, std::allocator<int>>);
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_multimap<int, int, C, V1, V1>;
+    using M2 = std::flat_multimap<int, int, C, V1, V2>;
+    using M3 = std::flat_multimap<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, std::from_range_t, M1, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::from_range_t, M1, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::from_range_t, M2, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::from_range_t, M3, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, std::from_range_t, M1, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::from_range_t, M1, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::from_range_t, M2, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::from_range_t, M3, const C&, const A2&>);
+  }
+  {
+    // container-compatible-range
+    using C           = test_less<int>;
+    using A1          = test_allocator<int>;
+    using A2          = test_allocator<std::string>;
+    using M           = std::flat_multimap<int, std::string, C, std::vector<int, A1>, std::vector<std::string, A2>>;
+    using Pair        = std::pair<int, std::string>;
+    using PairLike    = std::tuple<int, std::string>;
+    using NonPairLike = int;
+
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<Pair>&>);
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<PairLike>&>);
+    static_assert(!std::is_constructible_v<M, std::from_range_t, std::vector<NonPairLike>&>);
+
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<Pair>&, const C&>);
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<PairLike>&, const C&>);
+    static_assert(!std::is_constructible_v<M, std::from_range_t, std::vector<NonPairLike>&, const C&>);
+
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<Pair>&, const A1&>);
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<PairLike>&, const A1&>);
+    static_assert(!std::is_constructible_v<M, std::from_range_t, std::vector<NonPairLike>&, const A1&>);
+
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<Pair>&, const C&, const A1&>);
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<PairLike>&, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M, std::from_range_t, std::vector<NonPairLike>&, const C&, const A1&>);
+  }
+
+  using P      = std::pair<int, short>;
+  P ar[]       = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}};
+  P expected[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {2, 7}, {3, 6}, {3, 8}, {3, 9}};
+  {
+    // flat_multimap(from_range_t, R&&)
+    // input_range && !common
+    using M    = std::flat_multimap<int, short>;
+    using Iter = cpp20_input_iterator<const P*>;
+    using Sent = sentinel_wrapper<Iter>;
+    using R    = std::ranges::subrange<Iter, Sent>;
+    auto m     = M(std::from_range, R(Iter(ar), Sent(Iter(ar + 9))));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+
+    // explicit(false)
+    M m2 = {std::from_range, R(Iter(ar), Sent(Iter(ar + 9)))};
+    assert(m2 == m);
+  }
+  {
+    // flat_multimap(from_range_t, R&&)
+    // greater
+    using M = std::flat_multimap<int, short, std::greater<int>, std::deque<int, min_allocator<int>>, std::deque<short>>;
+    using Iter = cpp20_input_iterator<const P*>;
+    using Sent = sentinel_wrapper<Iter>;
+    using R    = std::ranges::subrange<Iter, Sent>;
+    auto m     = M(std::from_range, R(Iter(ar), Sent(Iter(ar + 9))));
+    assert((m.keys() == std::deque<int, min_allocator<int>>{3, 3, 3, 2, 2, 2, 1, 1, 1}));
+    LIBCPP_ASSERT((m.values() == std::deque<short>{6, 8, 9, 4, 5, 7, 1, 2, 3}));
+  }
+  {
+    // flat_multimap(from_range_t, R&&)
+    // contiguous range
+    using M = std::flat_multimap<int, short>;
+    using R = std::ranges::subrange<const P*>;
+    auto m  = M(std::from_range, R(ar, ar + 9));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+  }
+  {
+    // flat_multimap(from_range_t, R&&, const key_compare&)
+    using C = test_less<int>;
+    using M = std::flat_multimap<int, short, C, std::vector<int>, std::deque<short>>;
+    using R = std::ranges::subrange<const P*>;
+    auto m  = M(std::from_range, R(ar, ar + 9), C(3));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(3));
+
+    // explicit(false)
+    M m2 = {std::from_range, R(ar, ar + 9), C(3)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(3));
+  }
+  {
+    // flat_multimap(from_range_t, R&&, const Allocator&)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    using R  = std::ranges::subrange<const P*>;
+    auto m   = M(std::from_range, R(ar, ar + 9), A1(5));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(from_range_t, R&&, const Allocator&)
+    // explicit(false)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    using R  = std::ranges::subrange<const P*>;
+    M m      = {std::from_range, R(ar, ar + 9), A1(5)}; // implicit ctor
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(from_range_t, R&&, const key_compare&, const Allocator&)
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, C, std::vector<int, A1>, std::deque<short, A2>>;
+    using R  = std::ranges::subrange<const P*>;
+    auto m   = M(std::from_range, R(ar, ar + 9), C(3), A1(5));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(3));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(from_range_t, R&&, const key_compare&, const Allocator&)
+    // explicit(false)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, std::less<int>, std::deque<int, A1>, std::vector<short, A2>>;
+    using R  = std::ranges::subrange<const P*>;
+    M m      = {std::from_range, R(ar, ar + 9), {}, A2(5)}; // implicit ctor
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_container.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_container.pass.cpp
new file mode 100644
index 00000000000000..16579f0deed5d1
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_container.pass.cpp
@@ -0,0 +1,165 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap(sorted_equivalent_t, key_container_type key_cont, mapped_container_type mapped_cont,
+//          const key_compare& comp = key_compare());
+//
+// template<class Alloc>
+//   flat_multimap(sorted_equivalent_t, const key_container_type& key_cont,
+//            const mapped_container_type& mapped_cont, const Alloc& a);
+// template<class Alloc>
+//   flat_multimap(sorted_equivalent_t, const key_container_type& key_cont,
+//            const mapped_container_type& mapped_cont,
+//            const key_compare& comp, const Alloc& a);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_multimap<int, int, C, V1, V1>;
+    using M2 = std::flat_multimap<int, int, C, V1, V2>;
+    using M3 = std::flat_multimap<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, std::sorted_equivalent_t, const V1&, const V1&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_equivalent_t, const V1&, const V1&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_equivalent_t, const V1&, const V2&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_equivalent_t, const V2&, const V1&, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, std::sorted_equivalent_t, const V1&, const V1&, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_equivalent_t, const V1&, const V1&, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_equivalent_t, const V1&, const V2&, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_equivalent_t, const V2&, const V1&, const C&, const A2&>);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, key_container_type , mapped_container_type)
+    using M              = std::flat_multimap<int, char>;
+    std::vector<int> ks  = {1, 4, 4, 10};
+    std::vector<char> vs = {4, 3, 2, 1};
+    auto ks2             = ks;
+    auto vs2             = vs;
+
+    auto m = M(std::sorted_equivalent, ks, vs);
+    assert((m == M{{1, 4}, {4, 3}, {4, 2}, {10, 1}}));
+    m = M(std::sorted_equivalent, std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert((m == M{{1, 4}, {4, 3}, {4, 2}, {10, 1}}));
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent, std::move(ks2), std::move(vs2)};
+    assert(m == m2);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, key_container_type , mapped_container_type)
+    // non-default container, comparator and allocator type
+    using Ks = std::deque<int, min_allocator<int>>;
+    using Vs = std::deque<char, min_allocator<char>>;
+    using M  = std::flat_multimap<int, char, std::greater<int>, Ks, Vs>;
+    Ks ks    = {10, 1, 1, 1};
+    Vs vs    = {1, 2, 3, 4};
+    auto m   = M(std::sorted_equivalent, ks, vs);
+    assert((m == M{{1, 2}, {1, 3}, {1, 4}, {10, 1}}));
+    m = M(std::sorted_equivalent, std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert((m == M{{1, 2}, {1, 3}, {1, 4}, {10, 1}}));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, key_container_type , mapped_container_type)
+    // allocator copied into the containers
+    using A = test_allocator<int>;
+    using M = std::flat_multimap<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({2, 2, 4, 10}, A(4));
+    auto vs = std::deque<int, A>({4, 3, 2, 1}, A(5));
+    auto m  = M(std::sorted_equivalent, std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert((m == M{{2, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    assert(m.keys().get_allocator() == A(4));
+    assert(m.values().get_allocator() == A(5));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, key_container_type , mapped_container_type, key_compare)
+    using C              = test_less<int>;
+    using M              = std::flat_multimap<int, char, C>;
+    std::vector<int> ks  = {1, 2, 10, 10};
+    std::vector<char> vs = {4, 3, 2, 1};
+
+    auto m = M(std::sorted_equivalent, ks, vs, C(4));
+    assert((m == M{{1, 4}, {2, 3}, {10, 2}, {10, 1}}));
+    assert(m.key_comp() == C(4));
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent, ks, vs, C(4)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(4));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, key_container_type , mapped_container_type, key_compare, const Allocator&)
+    using C                = test_less<int>;
+    using A                = test_allocator<int>;
+    using M                = std::flat_multimap<int, int, C, std::vector<int, A>, std::vector<int, A>>;
+    std::vector<int, A> ks = {1, 2, 4, 10};
+    std::vector<int, A> vs = {4, 3, 2, 1};
+    auto m                 = M(std::sorted_equivalent, ks, vs, C(4), A(5));
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    assert(m.key_comp() == C(4));
+    assert(m.keys().get_allocator() == A(5));
+    assert(m.values().get_allocator() == A(5));
+
+    // explicit(false)
+    M m2 = {ks, vs, C(4), A(5)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(4));
+    assert(m2.keys().get_allocator() == A(5));
+    assert(m2.values().get_allocator() == A(5));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, key_container_type , mapped_container_type, const Allocator&)
+    using A = test_allocator<int>;
+    using M = std::flat_multimap<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 2, 4, 4}, A(4));
+    auto vs = std::deque<int, A>({4, 3, 2, 1}, A(5));
+    auto m  = M(std::sorted_equivalent, ks, vs, A(6)); // replaces the allocators
+    assert(!ks.empty());                               // it was an lvalue above
+    assert(!vs.empty());                               // it was an lvalue above
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {4, 1}}));
+    assert(m.keys().get_allocator() == A(6));
+    assert(m.values().get_allocator() == A(6));
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent, ks, vs, A(6)};
+    assert(m2 == m);
+    assert(m2.keys().get_allocator() == A(6));
+    assert(m2.values().get_allocator() == A(6));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_initializer_list.pass.cpp
new file mode 100644
index 00000000000000..b34313bb3d4043
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_initializer_list.pass.cpp
@@ -0,0 +1,183 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class InputIterator>
+//   flat_multimap(sorted_equivalent_t s, initializer_list<value_type> il,
+//            const key_compare& comp = key_compare())
+// template<class Alloc>
+//   flat_multimap(sorted_equivalent_t, initializer_list<value_type> il, const Alloc& a);
+// template<class Alloc>
+//   flat_multimap(sorted_equivalent_t, initializer_list<value_type> il,
+//            const key_compare& comp, const Alloc& a);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+template <class T, class U>
+std::initializer_list<std::pair<T, U>> il = {{1, 1}, {4, 2}, {4, 4}, {5, 5}};
+
+const auto il1 = il<int, int>;
+const auto il2 = il<int, short>;
+const auto il3 = il<short, int>;
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_multimap<int, int, C, V1, V1>;
+    using M2 = std::flat_multimap<int, int, C, V1, V2>;
+    using M3 = std::flat_multimap<int, int, C, V2, V1>;
+    using IL = std::initializer_list<std::pair<int, int>>;
+    static_assert(std::is_constructible_v<M1, std::sorted_equivalent_t, IL, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_equivalent_t, IL, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_equivalent_t, IL, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_equivalent_t, IL, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, std::sorted_equivalent_t, IL, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_equivalent_t, IL, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_equivalent_t, IL, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_equivalent_t, IL, const C&, const A2&>);
+  }
+  {
+    // initializer_list<value_type> needs to match exactly
+    using M = std::flat_multimap<int, short>;
+    using C = typename M::key_compare;
+    static_assert(std::is_constructible_v<M, std::sorted_equivalent_t, std::initializer_list<std::pair<int, short>>>);
+    static_assert(
+        std::is_constructible_v<M, std::sorted_equivalent_t, std::initializer_list<std::pair<int, short>>, C>);
+    static_assert(std::is_constructible_v<M,
+                                          std::sorted_equivalent_t,
+                                          std::initializer_list<std::pair<int, short>>,
+                                          C,
+                                          std::allocator<int>>);
+    static_assert(std::is_constructible_v<M,
+                                          std::sorted_equivalent_t,
+                                          std::initializer_list<std::pair<int, short>>,
+                                          std::allocator<int>>);
+    static_assert(
+        !std::is_constructible_v<M, std::sorted_equivalent_t, std::initializer_list<std::pair<const int, short>>>);
+    static_assert(
+        !std::is_constructible_v<M, std::sorted_equivalent_t, std::initializer_list<std::pair<const int, short>>, C>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_equivalent_t,
+                                           std::initializer_list<std::pair<const int, short>>,
+                                           C,
+                                           std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_equivalent_t,
+                                           std::initializer_list<std::pair<const int, short>>,
+                                           std::allocator<int>>);
+    static_assert(
+        !std::
+            is_constructible_v<M, std::sorted_equivalent_t, std::initializer_list<std::pair<const int, const short>>>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_equivalent_t,
+                                           std::initializer_list<std::pair<const int, const short>>,
+                                           C>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_equivalent_t,
+                                           std::initializer_list<std::pair<const int, const short>>,
+                                           C,
+                                           std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_equivalent_t,
+                                           std::initializer_list<std::pair<const int, const short>>,
+                                           std::allocator<int>>);
+  }
+
+  {
+    // flat_multimap(sorted_equivalent_t, initializer_list<value_type>);
+    using M       = std::flat_multimap<int, int>;
+    auto m        = M(std::sorted_equivalent, il1);
+    auto expected = M{{1, 1}, {4, 2}, {4, 4}, {5, 5}};
+    assert(m == expected);
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent, il1};
+    assert(m2 == m);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, initializer_list<value_type>, const key_compare&);
+    using M = std::flat_multimap<int, int, std::function<bool(int, int)>>;
+    auto m  = M(std::sorted_equivalent, il1, std::less<int>());
+    assert(m == M({{1, 1}, {4, 2}, {4, 4}, {5, 5}}, std::less<>()));
+    assert(m.key_comp()(1, 2) == true);
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent, il1, std::less<int>()};
+    assert(m2 == m);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, initializer_list<value_type>, const key_compare&);
+    // greater
+    using M = std::flat_multimap<int, int, std::greater<int>, std::deque<int, min_allocator<int>>, std::vector<int>>;
+    std::initializer_list<std::pair<int, int>> il4{{5, 5}, {4, 4}, {1, 2}, {1, 1}};
+    auto m = M(std::sorted_equivalent, il4, std::greater<int>());
+    assert((m == M{{5, 5}, {4, 4}, {1, 2}, {1, 1}}));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, initializer_list<value_type>,  const Allocator&)
+    using A1      = test_allocator<int>;
+    using A2      = test_allocator<short>;
+    using M       = std::flat_multimap<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    auto m        = M(std::sorted_equivalent, il2, A1(5));
+    auto expected = M{{1, 1}, {4, 2}, {4, 4}, {5, 5}};
+    assert(m == expected);
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent, il2, A1(5)};
+    assert(m2 == m);
+    assert(m2.keys().get_allocator() == A1(5));
+    assert(m2.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, initializer_list<value_type>, const key_compare&, const Allocator&);
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, C, std::vector<int, A1>, std::deque<short, A2>>;
+    auto m   = M(std::sorted_equivalent, il2, C(3), A1(5));
+    assert((m == M{{1, 1}, {4, 2}, {4, 4}, {5, 5}}));
+    assert(m.key_comp() == C(3));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, initializer_list<value_type>, const key_compare&, const Allocator&);
+    // explicit(false)
+    using A1 = test_allocator<short>;
+    using A2 = test_allocator<int>;
+    using M  = std::flat_multimap<short, int, std::less<int>, std::deque<short, A1>, std::vector<int, A2>>;
+    M m      = {std::sorted_equivalent, il3, {}, A1(5)}; // implicit ctor
+    assert((m == M{{1, 1}, {4, 2}, {4, 4}, {5, 5}}));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_iter_iter.pass.cpp
new file mode 100644
index 00000000000000..45c4b3dc675a5c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.cons/sorted_iter_iter.pass.cpp
@@ -0,0 +1,173 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class InputIterator>
+//   flat_multimap(sorted_equivalent_t, InputIterator first, InputIterator last, const key_compare& comp = key_compare());
+// template<class InputIterator, class Alloc>
+//   flat_multimap(InputIterator first, InputIterator last, const Alloc& a);
+// template<class InputIterator, class Allocator>
+//   flat_multimap(sorted_equivalent_t, InputIterator first, InputIterator last, const key_compare& comp, const Allocator& a);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+    using C     = test_less<int>;
+    using A1    = test_allocator<int>;
+    using A2    = other_allocator<int>;
+    using V1    = std::vector<int, A1>;
+    using V2    = std::vector<int, A2>;
+    using M1    = std::flat_multimap<int, int, C, V1, V1>;
+    using M2    = std::flat_multimap<int, int, C, V1, V2>;
+    using M3    = std::flat_multimap<int, int, C, V2, V1>;
+    using Iter1 = typename M1::iterator;
+    using Iter2 = typename M2::iterator;
+    using Iter3 = typename M3::iterator;
+    static_assert(std::is_constructible_v<M1, std::sorted_equivalent_t, Iter1, Iter1, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_equivalent_t, Iter1, Iter1, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_equivalent_t, Iter2, Iter2, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_equivalent_t, Iter3, Iter3, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, std::sorted_equivalent_t, Iter1, Iter1, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_equivalent_t, Iter1, Iter1, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_equivalent_t, Iter2, Iter2, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_equivalent_t, Iter3, Iter3, const C&, const A2&>);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, InputIterator, InputIterator);
+    // cpp17_input_iterator
+    using M = std::flat_multimap<int, int>;
+    using P = std::pair<int, int>;
+    P ar[]  = {{1, 1}, {4, 4}, {5, 5}, {5, 2}};
+    auto m  = M(std::sorted_equivalent, cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 4));
+    auto expected = M{{1, 1}, {4, 4}, {5, 5}, {5, 2}};
+    assert(m == expected);
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent, cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 4)};
+    assert(m2 == m);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, InputIterator, InputIterator);
+    // contiguous iterator
+    using C = test_less<int>;
+    using M =
+        std::flat_multimap<int, int, C, std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>;
+    std::pair<int, int> ar[] = {{1, 1}, {1, 4}, {2, 2}, {5, 5}};
+    auto m                   = M(std::sorted_equivalent, ar, ar + 4);
+    auto expected            = M{{1, 1}, {1, 4}, {2, 2}, {5, 5}};
+    assert(m == expected);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, InputIterator, InputIterator, const key_compare&);
+    // cpp_17_input_iterator
+    using M = std::flat_multimap<int, int, std::function<bool(int, int)>>;
+    using P = std::pair<int, int>;
+    P ar[]  = {{1, 1}, {2, 2}, {2, 4}, {5, 5}};
+    auto m  = M(std::sorted_equivalent,
+               cpp17_input_iterator<const P*>(ar),
+               cpp17_input_iterator<const P*>(ar + 4),
+               std::less<int>());
+    assert(m == M({{1, 1}, {2, 2}, {2, 4}, {5, 5}}, std::less<>()));
+    assert(m.key_comp()(1, 2) == true);
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent,
+            cpp17_input_iterator<const P*>(ar),
+            cpp17_input_iterator<const P*>(ar + 4),
+            std::less<int>()};
+    assert(m2 == m);
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, InputIterator, InputIterator, const key_compare&);
+    // greater
+    using M = std::flat_multimap<int, int, std::greater<int>, std::deque<int, min_allocator<int>>, std::vector<int>>;
+    using P = std::pair<int, int>;
+    P ar[]  = {{5, 5}, {2, 4}, {2, 2}, {1, 1}};
+    auto m  = M(std::sorted_equivalent,
+               cpp17_input_iterator<const P*>(ar),
+               cpp17_input_iterator<const P*>(ar + 4),
+               std::greater<int>());
+    assert((m == M{{5, 5}, {2, 4}, {2, 2}, {1, 1}}));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, InputIterator, InputIterator, const key_compare&);
+    // contiguous iterator
+    using C = test_less<int>;
+    using M =
+        std::flat_multimap<int, int, C, std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>;
+    std::pair<int, int> ar[1] = {{42, 42}};
+    auto m                    = M(std::sorted_equivalent, ar, ar, C(5));
+    assert(m.empty());
+    assert(m.key_comp() == C(5));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, InputIterator , InputIterator, const Allocator&)
+    using A1      = test_allocator<int>;
+    using A2      = test_allocator<short>;
+    using M       = std::flat_multimap<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    using P       = std::pair<int, int>;
+    P ar[]        = {{2, 1}, {2, 2}, {4, 4}, {5, 5}};
+    auto m        = M(std::sorted_equivalent, ar, ar + 4, A1(5));
+    auto expected = M{{2, 1}, {2, 2}, {4, 4}, {5, 5}};
+    assert(m == expected);
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+
+    // explicit(false)
+    M m2 = {std::sorted_equivalent, ar, ar + 4, A1(5)};
+    assert(m2 == m);
+    assert(m2.keys().get_allocator() == A1(5));
+    assert(m2.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, InputIterator, InputIterator, const key_compare&, const Allocator&);
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_multimap<int, short, C, std::vector<int, A1>, std::deque<short, A2>>;
+    using P  = std::pair<int, int>;
+    P ar[]   = {{1, 1}, {1, 2}, {1, 4}, {1, 5}};
+    auto m   = M(std::sorted_equivalent, ar, ar + 4, C(3), A1(5));
+    assert((m == M{{1, 1}, {1, 2}, {1, 4}, {1, 5}}));
+    assert(m.key_comp() == C(3));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_multimap(sorted_equivalent_t, InputIterator, InputIterator, const key_compare&, const Allocator&);
+    // explicit(false)
+    using A1 = test_allocator<short>;
+    using A2 = test_allocator<int>;
+    using M  = std::flat_multimap<short, int, std::less<int>, std::deque<short, A1>, std::vector<int, A2>>;
+    using P  = std::pair<int, int>;
+    P ar[]   = {{1, 1}, {1, 2}, {1, 4}, {1, 5}};
+    M m      = {std::sorted_equivalent, ar, ar + 4, {}, A1(5)}; // implicit ctor
+    assert((m == M{{1, 1}, {1, 2}, {1, 4}, {1, 5}}));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.erasure/erase_if.pass.cpp
new file mode 100644
index 00000000000000..76d5cbd9090505
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.erasure/erase_if.pass.cpp
@@ -0,0 +1,98 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class Key, class T, class Compare, class KeyContainer, class MappedContainer, class Predicate>
+//   typename flat_multimap<Key, T, Compare, KeyContainer, MappedContainer>::size_type
+//   erase_if(flat_multimap<Key, T, Compare, KeyContainer, MappedContainer>& c, Predicate pred);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <initializer_list>
+#include <vector>
+
+#include "test_macros.h"
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+// Verify that `flat_multimap` (like `multimap`) does NOT support std::erase.
+//
+template <class S>
+concept HasStdErase = requires(S& s, typename S::value_type x) { std::erase(s, x); };
+static_assert(HasStdErase<std::vector<int>>);
+static_assert(!HasStdErase<std::flat_multimap<int, int>>);
+
+template <class M>
+M make(std::initializer_list<int> vals) {
+  M ret;
+  for (int v : vals) {
+    ret.emplace(static_cast<typename M::key_type>(v), static_cast<typename M::mapped_type>(v + 10));
+  }
+  return ret;
+}
+
+template <class M, class Pred>
+void test0(
+    std::initializer_list<int> vals, Pred p, std::initializer_list<int> expected, std::size_t expected_erased_count) {
+  M s = make<M>(vals);
+  ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p)));
+  assert(expected_erased_count == std::erase_if(s, p));
+  assert(s == make<M>(expected));
+}
+
+template <class S>
+void test() {
+  // Test all the plausible signatures for this predicate.
+  auto is1   = [](typename S::const_reference v) { return v.first == 1; };
+  auto is2   = [](typename S::value_type v) { return v.first == 2; };
+  auto is3   = [](const typename S::value_type& v) { return v.first == 3; };
+  auto is4   = [](auto v) { return v.first == 4; };
+  auto True  = [](const auto&) { return true; };
+  auto False = [](auto&&) { return false; };
+
+  test0<S>({}, is1, {}, 0);
+
+  test0<S>({1}, is1, {}, 1);
+  test0<S>({1, 1}, is1, {}, 2);
+  test0<S>({1, 1}, is2, {1, 1}, 0);
+
+  test0<S>({1, 2}, is1, {2}, 1);
+  test0<S>({1, 2}, is2, {1}, 1);
+  test0<S>({1, 2, 2, 2}, is2, {1}, 3);
+  test0<S>({1, 2, 2, 2}, is3, {1, 2, 2, 2}, 0);
+
+  test0<S>({1, 1, 2, 2, 3, 3}, is1, {2, 2, 3, 3}, 2);
+  test0<S>({1, 1, 2, 2, 3, 3}, is2, {1, 1, 3, 3}, 2);
+  test0<S>({1, 1, 2, 2, 3, 3}, is3, {1, 1, 2, 2}, 2);
+  test0<S>({1, 1, 2, 2, 3, 3}, is4, {1, 1, 2, 2, 3, 3}, 0);
+
+  test0<S>({1, 2, 2, 3, 3, 3}, True, {}, 6);
+  test0<S>({1, 2, 2, 3, 3, 3}, False, {1, 2, 2, 3, 3, 3}, 0);
+}
+
+int main(int, char**) {
+  test<std::flat_multimap<int, char>>();
+  test<std::flat_multimap<int,
+                          char,
+                          std::less<int>,
+                          std::vector<int, min_allocator<int>>,
+                          std::vector<char, min_allocator<char>>>>();
+  test<std::flat_multimap<int, char, std::greater<int>, std::vector<int, test_allocator<int>>>>();
+  test<std::flat_multimap<int, char, std::less<int>, std::deque<int, min_allocator<int>>>>();
+  test<std::flat_multimap<int, char, std::greater<int>, std::deque<int, test_allocator<int>>>>();
+  test<std::flat_multimap<long, int>>();
+  test<std::flat_multimap<double, int>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.erasure/erase_if_exceptions.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.erasure/erase_if_exceptions.pass.cpp
new file mode 100644
index 00000000000000..d691535c248a09
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.erasure/erase_if_exceptions.pass.cpp
@@ -0,0 +1,157 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: no-exceptions
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class Key, class T, class Compare, class KeyContainer, class MappedContainer, class Predicate>
+//   typename flat_multimap<Key, T, Compare, KeyContainer, MappedContainer>::size_type
+//   erase_if(flat_multimap<Key, T, Compare, KeyContainer, MappedContainer>& c, Predicate pred);
+// If any member function in [flat.set.defn] exits via an exception, the invariant is restored.
+// (This is not a member function, but let's respect the invariant anyway.)
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+
+struct Counter {
+  int c1, c2, throws;
+  void tick() {
+    c1 -= 1;
+    if (c1 == 0) {
+      c1 = c2;
+      throws += 1;
+      throw 42;
+    }
+  }
+};
+Counter g_counter = {0, 0, 0};
+
+struct ThrowingAssignment {
+  ThrowingAssignment(int i) : i_(i) {}
+  ThrowingAssignment(const ThrowingAssignment&) = default;
+  ThrowingAssignment& operator=(const ThrowingAssignment& rhs) {
+    g_counter.tick();
+    i_ = rhs.i_;
+    g_counter.tick();
+    return *this;
+  }
+  operator int() const { return i_; }
+  int i_;
+};
+
+struct ThrowingComparator {
+  bool operator()(const ThrowingAssignment& a, const ThrowingAssignment& b) const {
+    g_counter.tick();
+    return a.i_ < b.i_;
+  }
+};
+
+struct ErasurePredicate {
+  bool operator()(const auto& x) const { return (3 <= x.first && x.first <= 5); }
+};
+
+int main(int, char**) {
+  const std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 3}, {3, 3}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+  {
+    using M = std::flat_multimap<ThrowingAssignment, int, ThrowingComparator>;
+    for (int first_throw = 1; first_throw < 99; ++first_throw) {
+      for (int second_throw = 1; second_throw < 99; ++second_throw) {
+        g_counter = {0, 0, 0};
+        M m       = M({1, 2, 3, 3, 5, 6, 7, 8}, {1, 2, 3, 3, 5, 6, 7, 8});
+        try {
+          g_counter = {first_throw, second_throw, 0};
+          auto n    = std::erase_if(m, ErasurePredicate());
+          assert(n == 3);
+          // If it didn't throw at all, we're done.
+          g_counter = {0, 0, 0};
+          assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}}));
+          first_throw = 99; // "done"
+          break;
+        } catch (int ex) {
+          assert(ex == 42);
+          check_invariant(m);
+          LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8));
+          if (g_counter.throws == 1) {
+            // We reached the first throw but not the second throw.
+            break;
+          }
+        }
+      }
+    }
+  }
+  {
+    using M = std::flat_multimap<int, ThrowingAssignment, ThrowingComparator>;
+    for (int first_throw = 1; first_throw < 99; ++first_throw) {
+      for (int second_throw = 1; second_throw < 99; ++second_throw) {
+        g_counter = {0, 0, 0};
+        M m       = M({1, 2, 3, 3, 5, 6, 7, 8}, {1, 2, 3, 3, 5, 6, 7, 8});
+        try {
+          g_counter = {first_throw, second_throw, 0};
+          auto n    = std::erase_if(m, ErasurePredicate());
+          assert(n == 3);
+          // If it didn't throw at all, we're done.
+          g_counter = {0, 0, 0};
+          assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}}));
+          first_throw = 99; // "done"
+          break;
+        } catch (int ex) {
+          assert(ex == 42);
+          check_invariant(m);
+          LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8));
+          if (g_counter.throws == 1) {
+            // We reached the first throw but not the second throw.
+            break;
+          }
+        }
+      }
+    }
+  }
+  {
+    using M =
+        std::flat_multimap<ThrowingAssignment, int, ThrowingComparator, std::deque<ThrowingAssignment>, std::deque<int>>;
+    for (int first_throw = 1; first_throw < 99; ++first_throw) {
+      for (int second_throw = 1; second_throw < 99; ++second_throw) {
+        g_counter                                = {0, 0, 0};
+        std::deque<ThrowingAssignment> container = {5, 6, 7, 8};
+        container.insert(container.begin(), {1, 2, 3, 3});
+        M m = M(std::move(container), {1, 2, 3, 3, 5, 6, 7, 8});
+        try {
+          g_counter = {first_throw, second_throw, 0};
+          auto n    = std::erase_if(m, ErasurePredicate());
+          assert(n == 3);
+          // If it didn't throw at all, we're done.
+          g_counter = {0, 0, 0};
+          assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}}));
+          first_throw = 99; // "done"
+          break;
+        } catch (int ex) {
+          assert(ex == 42);
+          check_invariant(m);
+          LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8));
+          if (g_counter.throws == 1) {
+            // We reached the first throw but not the second throw.
+            break;
+          }
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator.pass.cpp
new file mode 100644
index 00000000000000..c1285955e5db6d
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator.pass.cpp
@@ -0,0 +1,105 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+//       iterator begin()   noexcept;
+// const_iterator begin()   const noexcept
+//       iterator end()     noexcept;
+// const_iterator end()     const noexcept;
+//
+// const_iterator cbegin()  const noexcept;
+// const_iterator cend()    const noexcept;
+
+#include <cassert>
+#include <cstddef>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m         = {{1, 'a'}, {1, 'z'}, {2, 'b'}, {3, 'a'}, {3, 'b'}, {3, 'c'}, {4, 'd'}};
+  const M& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.begin()), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(m.cbegin()), typename M::const_iterator);
+  ASSERT_SAME_TYPE(decltype(cm.begin()), typename M::const_iterator);
+  ASSERT_SAME_TYPE(decltype(m.end()), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(m.cend()), typename M::const_iterator);
+  ASSERT_SAME_TYPE(decltype(cm.end()), typename M::const_iterator);
+  static_assert(noexcept(m.begin()));
+  static_assert(noexcept(cm.begin()));
+  static_assert(noexcept(m.cbegin()));
+  static_assert(noexcept(m.end()));
+  static_assert(noexcept(cm.end()));
+  static_assert(noexcept(m.cend()));
+  assert(m.size() == 7);
+  assert(std::distance(m.begin(), m.end()) == 7);
+  assert(std::distance(cm.begin(), cm.end()) == 7);
+  assert(std::distance(m.cbegin(), m.cend()) == 7);
+  typename M::iterator i;                   // default-construct
+  i                            = m.begin(); // move-assignment
+  typename M::const_iterator k = i;         // converting constructor
+  assert(i == k);                           // comparison
+  assert(i->first == 1);                    // operator->
+  assert(i->second == 'a');                 // operator->
+  ++i;                                      // pre-increment
+  assert(i->first == 1);                    // operator->
+  assert(i->second == 'z');                 // operator->
+  i = i + 3;                                // operator+
+  assert((*i).first == 3);                  // operator*
+  assert((*i).second == 'b');               // operator*
+  i += 3;                                   // operator+=
+  assert(i == m.end());                     // operator==
+  --i;                                      // pre-decrement
+  assert(i->first == 4);                    // operator->
+  assert(i->second == 'd');                 // operator->
+  i = i - 2;                                // operator-
+  assert(i->first == 3);                    // operator->
+  assert(i->second == 'b');                 // operator->
+  i -= 2;                                   // operator-=
+  assert(i > m.begin());                    // operator>
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  {
+    // N3644 testing
+    using C = std::flat_multimap<int, char>;
+    C::iterator ii1{}, ii2{};
+    C::iterator ii4 = ii1;
+    C::const_iterator cii{};
+    assert(ii1 == ii2);
+    assert(ii1 == ii4);
+    assert(!(ii1 != ii2));
+
+    assert((ii1 == cii));
+    assert((cii == ii1));
+    assert(!(ii1 != cii));
+    assert(!(cii != ii1));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator_comparison.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator_comparison.pass.cpp
new file mode 100644
index 00000000000000..f1b2cad743e23f
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator_comparison.pass.cpp
@@ -0,0 +1,155 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_multimap iterators should be C++20 random access iterators
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using KI    = typename KeyContainer::iterator;
+  using I     = M::iterator;
+  using CI    = M::const_iterator;
+  using RI    = M::reverse_iterator;
+  using CRI   = M::const_reverse_iterator;
+
+  static_assert(std::equality_comparable<I>);
+  static_assert(std::equality_comparable<CI>);
+  static_assert(std::equality_comparable<RI>);
+  static_assert(std::equality_comparable<CRI>);
+
+  static_assert(std::totally_ordered<I>);
+  static_assert(std::totally_ordered<CI>);
+  static_assert(std::totally_ordered<RI>);
+  static_assert(std::totally_ordered<CRI>);
+
+  M m = {{1, 'a'}, {2, 'b'}, {2, 'e'}, {3, 'z'}, {3, 'y'}, {3, 'c'}, {4, 'd'}};
+
+  I i1 = m.begin();
+  I i2 = m.begin() + 1;
+
+  assert(i1 == i1);
+  assert(!(i1 != i1));
+  assert(i1 != i2);
+  assert(!(i1 == i2));
+  assert(i1 < i2);
+  assert(!(i1 < i1));
+  assert(i1 <= i1);
+  assert(i1 <= i2);
+  assert(!(i2 <= i1));
+  assert(i2 > i1);
+  assert(!(i2 > i2));
+  assert(i2 >= i1);
+  assert(i2 >= i2);
+  assert(!(i1 >= i2));
+
+  CI ci1 = m.cbegin();
+  CI ci2 = m.cbegin() + 1;
+  assert(ci1 == ci1);
+  assert(!(ci1 != ci1));
+  assert(ci1 != ci2);
+  assert(!(ci1 == ci2));
+  assert(ci1 < ci2);
+  assert(!(ci1 < ci1));
+  assert(ci1 <= ci1);
+  assert(ci1 <= ci2);
+  assert(!(ci2 <= ci1));
+  assert(ci2 > ci1);
+  assert(!(ci2 > ci2));
+  assert(ci2 >= ci1);
+  assert(ci2 >= ci2);
+  assert(!(ci1 >= ci2));
+
+  RI ri1 = m.rbegin();
+  RI ri2 = m.rbegin() + 1;
+  assert(ri1 == ri1);
+  assert(!(ri1 != ri1));
+  assert(ri1 != ri2);
+  assert(!(ri1 == ri2));
+  assert(ri1 < ri2);
+  assert(!(ri1 < ri1));
+  assert(ri1 <= ri1);
+  assert(ri1 <= ri2);
+  assert(!(ri2 <= ri1));
+  assert(ri2 > ri1);
+  assert(!(ri2 > ri2));
+  assert(ri2 >= ri1);
+  assert(ri2 >= ri2);
+  assert(!(ri1 >= ri2));
+
+  CRI cri1 = m.crbegin();
+  CRI cri2 = m.crbegin() + 1;
+  assert(cri1 == cri1);
+  assert(!(cri1 != cri1));
+  assert(cri1 != cri2);
+  assert(!(cri1 == cri2));
+  assert(cri1 < cri2);
+  assert(!(cri1 < cri1));
+  assert(cri1 <= cri1);
+  assert(cri1 <= cri2);
+  assert(!(cri2 <= cri1));
+  assert(cri2 > cri1);
+  assert(!(cri2 > cri2));
+  assert(cri2 >= cri1);
+  assert(cri2 >= cri2);
+  assert(!(cri1 >= cri2));
+
+  if constexpr (std::three_way_comparable<KI>) {
+    static_assert(std::three_way_comparable<I>); // ...of course the wrapped iterators still support <=>.
+    static_assert(std::three_way_comparable<CI>);
+    static_assert(std::three_way_comparable<RI>);
+    static_assert(std::three_way_comparable<CRI>);
+    static_assert(std::same_as<decltype(I() <=> I()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(I() <=> CI()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(CI() <=> CI()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(RI() <=> RI()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(RI() <=> CRI()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(CRI() <=> CRI()), std::strong_ordering>);
+
+    assert(i1 <=> i1 == std::strong_ordering::equivalent);
+    assert(i1 <=> i2 == std::strong_ordering::less);
+    assert(i2 <=> i1 == std::strong_ordering::greater);
+
+    assert(ci1 <=> ci1 == std::strong_ordering::equivalent);
+    assert(ci1 <=> ci2 == std::strong_ordering::less);
+    assert(ci2 <=> ci1 == std::strong_ordering::greater);
+
+    assert(ri1 <=> ri1 == std::strong_ordering::equivalent);
+    assert(ri1 <=> ri2 == std::strong_ordering::less);
+    assert(ri2 <=> ri1 == std::strong_ordering::greater);
+
+    assert(cri1 <=> cri1 == std::strong_ordering::equivalent);
+    assert(cri1 <=> cri2 == std::strong_ordering::less);
+    assert(cri2 <=> cri1 == std::strong_ordering::greater);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator_concept_conformance.compile.pass.cpp
new file mode 100644
index 00000000000000..ce578e4def92b5
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/iterator_concept_conformance.compile.pass.cpp
@@ -0,0 +1,84 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// iterator, const_iterator, reverse_iterator, const_reverse_iterator
+
+#include <flat_map>
+#include <deque>
+#include <functional>
+#include <iterator>
+#include <string>
+#include <vector>
+#include <type_traits>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using C     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using I     = C::iterator;
+  using CI    = C::const_iterator;
+  using RI    = C::reverse_iterator;
+  using CRI   = C::const_reverse_iterator;
+  static_assert(std::random_access_iterator<I>);
+  static_assert(std::random_access_iterator<CI>);
+  static_assert(std::random_access_iterator<RI>);
+  static_assert(std::random_access_iterator<CRI>);
+  static_assert(!std::contiguous_iterator<I>);
+  static_assert(!std::contiguous_iterator<CI>);
+  static_assert(!std::contiguous_iterator<RI>);
+  static_assert(!std::contiguous_iterator<CRI>);
+  static_assert(!std::indirectly_writable<I, std::pair<int, char>>);
+  static_assert(!std::indirectly_writable<CI, std::pair<int, char>>);
+  static_assert(!std::indirectly_writable<RI, std::pair<int, char>>);
+  static_assert(!std::indirectly_writable<CRI, std::pair<int, char>>);
+  static_assert(std::sentinel_for<I, I>);
+  static_assert(std::sentinel_for<I, CI>);
+  static_assert(!std::sentinel_for<I, RI>);
+  static_assert(!std::sentinel_for<I, CRI>);
+  static_assert(std::sentinel_for<CI, I>);
+  static_assert(std::sentinel_for<CI, CI>);
+  static_assert(!std::sentinel_for<CI, RI>);
+  static_assert(!std::sentinel_for<CI, CRI>);
+  static_assert(!std::sentinel_for<RI, I>);
+  static_assert(!std::sentinel_for<RI, CI>);
+  static_assert(std::sentinel_for<RI, RI>);
+  static_assert(std::sentinel_for<RI, CRI>);
+  static_assert(!std::sentinel_for<CRI, I>);
+  static_assert(!std::sentinel_for<CRI, CI>);
+  static_assert(std::sentinel_for<CRI, RI>);
+  static_assert(std::sentinel_for<CRI, CRI>);
+  static_assert(std::indirectly_movable_storable<I, std::pair<int, char>*>);
+  static_assert(std::indirectly_movable_storable<CI, std::pair<int, char>*>);
+  static_assert(std::indirectly_movable_storable<RI, std::pair<int, char>*>);
+  static_assert(std::indirectly_movable_storable<CRI, std::pair<int, char>*>);
+
+#ifdef _LIBCPP_VERSION
+  static_assert(std::is_same_v<typename std::iterator_traits<I>::iterator_category, std::random_access_iterator_tag>);
+  static_assert(std::is_same_v<typename std::iterator_traits<CI>::iterator_category, std::random_access_iterator_tag>);
+  static_assert(std::is_same_v<typename std::iterator_traits<RI>::iterator_category, std::random_access_iterator_tag>);
+  static_assert(std::is_same_v<typename std::iterator_traits<CRI>::iterator_category, std::random_access_iterator_tag>);
+#endif
+}
+
+void test() {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/range_concept_conformance.compile.pass.cpp
new file mode 100644
index 00000000000000..979c0b090fd665
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/range_concept_conformance.compile.pass.cpp
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <string>
+#include <vector>
+#include "MinSequenceContainer.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  {
+    using Key   = typename KeyContainer::value_type;
+    using Value = typename ValueContainer::value_type;
+    using C     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+    static_assert(std::same_as<std::ranges::iterator_t<C>, typename C::iterator>);
+    static_assert(std::ranges::random_access_range<C>);
+    static_assert(!std::ranges::contiguous_range<C>);
+    static_assert(std::ranges::common_range<C>);
+    static_assert(std::ranges::input_range<C>);
+    static_assert(!std::ranges::view<C>);
+    static_assert(std::ranges::sized_range<C>);
+    static_assert(!std::ranges::borrowed_range<C>);
+    static_assert(std::ranges::viewable_range<C>);
+
+    static_assert(std::same_as<std::ranges::iterator_t<const C>, typename C::const_iterator>);
+    static_assert(std::ranges::random_access_range<const C>);
+    static_assert(!std::ranges::contiguous_range<const C>);
+    static_assert(std::ranges::common_range<const C>);
+    static_assert(std::ranges::input_range<const C>);
+    static_assert(!std::ranges::view<const C>);
+    static_assert(std::ranges::sized_range<const C>);
+    static_assert(!std::ranges::borrowed_range<const C>);
+    static_assert(!std::ranges::viewable_range<const C>);
+  }
+}
+
+void test() {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/reverse_iterator.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/reverse_iterator.pass.cpp
new file mode 100644
index 00000000000000..8c1e5451f703fb
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.iterators/reverse_iterator.pass.cpp
@@ -0,0 +1,92 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+//       reverse_iterator rbegin() noexcept;
+// const_reverse_iterator rbegin() const noexcept;
+//       reverse_iterator rend()   noexcept;
+// const_reverse_iterator rend()   const noexcept;
+//
+// const_reverse_iterator crbegin() const noexcept;
+// const_reverse_iterator crend()   const noexcept;
+
+#include <cassert>
+#include <cstddef>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include <iterator>
+
+#include "test_macros.h"
+#include <iostream>
+
+int main(int, char**) {
+  {
+    using M     = std::flat_multimap<int, char, std::less<int>, std::deque<int>, std::deque<char>>;
+    M m         = {{1, 'a'}, {1, 'b'}, {2, 'c'}, {2, 'd'}, {3, 'e'}, {3, 'f'}, {4, 'g'}, {4, 'h'}};
+    const M& cm = m;
+    ASSERT_SAME_TYPE(decltype(m.rbegin()), M::reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(m.crbegin()), M::const_reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(cm.rbegin()), M::const_reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(m.rend()), M::reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(m.crend()), M::const_reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(cm.rend()), M::const_reverse_iterator);
+    static_assert(noexcept(m.rbegin()));
+    static_assert(noexcept(cm.rbegin()));
+    static_assert(noexcept(m.crbegin()));
+    static_assert(noexcept(m.rend()));
+    static_assert(noexcept(cm.rend()));
+    static_assert(noexcept(m.crend()));
+    assert(m.size() == 8);
+    assert(std::distance(m.rbegin(), m.rend()) == 8);
+    assert(std::distance(cm.rbegin(), cm.rend()) == 8);
+    assert(std::distance(m.crbegin(), m.crend()) == 8);
+    assert(std::distance(cm.crbegin(), cm.crend()) == 8);
+    M::reverse_iterator i; // default-construct
+    ASSERT_SAME_TYPE(decltype(i->first), const int&);
+    ASSERT_SAME_TYPE(decltype(i->second), char&);
+    i                           = m.rbegin(); // move-assignment
+    M::const_reverse_iterator k = i;          // converting constructor
+    assert(i == k);                           // comparison
+    for (int j = 8; j >= 1; --j, ++i) {       // pre-increment
+      assert(i->first == (j + 1) / 2);        // operator->
+      assert(i->second == 'a' + j - 1);
+    }
+    assert(i == m.rend());
+    for (int j = 1; j <= 8; ++j) {
+      --i; // pre-decrement
+      assert((*i).first == (j + 1) / 2);
+      assert((*i).second == 'a' + j - 1);
+    }
+    assert(i == m.rbegin());
+  }
+  {
+    // N3644 testing
+    using C = std::flat_multimap<int, char>;
+    C::reverse_iterator ii1{}, ii2{};
+    C::reverse_iterator ii4 = ii1;
+    C::const_reverse_iterator cii{};
+    assert(ii1 == ii2);
+    assert(ii1 == ii4);
+    assert(!(ii1 != ii2));
+
+    assert((ii1 == cii));
+    assert((cii == ii1));
+    assert(!(ii1 != cii));
+    assert(!(cii != ii1));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/clear.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/clear.pass.cpp
new file mode 100644
index 00000000000000..5b0788b6826fd4
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/clear.pass.cpp
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// void clear() noexcept;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// test noexcept
+
+template <class T>
+concept NoExceptClear = requires(T t) {
+  { t.clear() } noexcept;
+};
+
+static_assert(NoExceptClear<std::flat_multimap<int, int>>);
+#ifndef TEST_HAS_NO_EXCEPTIONS
+static_assert(
+    NoExceptClear<std::flat_multimap<int, int, std::less<int>, ThrowOnMoveContainer<int>, ThrowOnMoveContainer<int>>>);
+#endif
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m = {{5, 2}, {2, 1}, {2, 3}, {2, 1}, {5, 0}};
+  assert(m.size() == 5);
+  ASSERT_NOEXCEPT(m.clear());
+  ASSERT_SAME_TYPE(decltype(m.clear()), void);
+  m.clear();
+  assert(m.size() == 0);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/emplace.pass.cpp
new file mode 100644
index 00000000000000..9ef0c26e54ba34
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/emplace.pass.cpp
@@ -0,0 +1,158 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template <class... Args>
+// iterator emplace(Args&&... args);
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <tuple>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "../../../Emplaceable.h"
+#include "DefaultOnly.h"
+#include "min_allocator.h"
+
+// Constraints: is_constructible_v<pair<key_type, mapped_type>, Args...> is true.
+template <class M, class... Args>
+concept CanEmplace = requires(M m, Args&&... args) { m.emplace(std::forward<Args>(args)...); };
+
+using Map = std::flat_multimap<Emplaceable, Emplaceable>;
+static_assert(CanEmplace<Map>);
+static_assert(CanEmplace<Map, Emplaceable, Emplaceable>);
+static_assert(CanEmplace<Map, std::piecewise_construct_t, std::tuple<int, double>, std::tuple<int, double>>);
+static_assert(!CanEmplace<Map, Emplaceable>);
+static_assert(!CanEmplace<Map, int, double>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using R     = typename M::iterator;
+
+  {
+    // was empty
+    M m;
+    std::same_as<R> decltype(auto) r = m.emplace(typename M::value_type(2, 3.5));
+    assert(r == m.begin());
+    assert(m.size() == 1);
+    assert(r->first == 2);
+    assert(r->second == 3.5);
+  }
+  {
+    // key does not exist and inserted at the begin
+    M m                              = {{3, 4.0}, {3, 3.0}, {3, 1.0}, {7, 0.0}};
+    std::same_as<R> decltype(auto) r = m.emplace(typename M::value_type(2, 2.0));
+    assert(r == m.begin());
+    assert(m.size() == 5);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // key does not exist and inserted in the middle
+    M m                              = {{1, 4.0}, {1, 3.0}, {3, 1.0}, {4, 0.0}};
+    std::same_as<R> decltype(auto) r = m.emplace(typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 2);
+    assert(m.size() == 5);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // key does not exist and inserted at the end
+    M m                              = {{1, 4.0}, {1, 3.0}};
+    std::same_as<R> decltype(auto) r = m.emplace(typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 2);
+    assert(m.size() == 3);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // key already exists and original at the begin
+    M m                              = {{2, 4.0}, {2, 3.0}, {5, 1.0}, {6, 0.0}};
+    std::same_as<R> decltype(auto) r = m.emplace(typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 2);
+    assert(m.size() == 5);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // key already exists and original in the middle
+    M m                              = {{0, 4.0}, {2, 3.0}, {2, 1.0}, {4, 0.0}};
+    std::same_as<R> decltype(auto) r = m.emplace(typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 3);
+    assert(m.size() == 5);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // key already exists and original at the end
+    M m                              = {{0, 4.0}, {1, 3.0}, {2, 1.0}};
+    std::same_as<R> decltype(auto) r = m.emplace(typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 3);
+    assert(m.size() == 4);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+}
+
+template <class KeyContainer, class ValueContainer>
+void test_emplaceable() {
+  using M = std::flat_multimap<int, Emplaceable, std::less<int>, KeyContainer, ValueContainer>;
+  using R = typename M::iterator;
+
+  M m;
+  std::same_as<R> decltype(auto) r =
+      m.emplace(std::piecewise_construct, std::forward_as_tuple(2), std::forward_as_tuple());
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == Emplaceable());
+  r = m.emplace(std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5));
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(m.begin()->first == 1);
+  assert(m.begin()->second == Emplaceable(2, 3.5));
+  r = m.emplace(std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5));
+  assert(r == m.begin() + 1);
+  assert(m.size() == 3);
+  assert(m.begin()->first == 1);
+  assert(m.begin()->second == Emplaceable(2, 3.5));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  test_emplaceable<std::vector<int>, std::vector<Emplaceable>>();
+  test_emplaceable<std::deque<int>, std::vector<Emplaceable>>();
+  test_emplaceable<MinSequenceContainer<int>, MinSequenceContainer<Emplaceable>>();
+  test_emplaceable<std::vector<int, min_allocator<int>>, std::vector<Emplaceable, min_allocator<Emplaceable>>>();
+
+  {
+    auto emplace_func = [](auto& m, auto key_arg, auto value_arg) {
+      m.emplace(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+    };
+    test_emplace_exception_guarantee(emplace_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/emplace_hint.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/emplace_hint.pass.cpp
new file mode 100644
index 00000000000000..588d27ea54f4d6
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/emplace_hint.pass.cpp
@@ -0,0 +1,228 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template <class... Args>
+//   iterator emplace_hint(const_iterator position, Args&&... args);
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "../../../Emplaceable.h"
+#include "DefaultOnly.h"
+#include "min_allocator.h"
+#include "../helpers.h"
+
+#if defined(_LIBCPP_VERSION)
+// spec only specifies `emplace(Args&&...)` is_constructible_v<pair<key_type, mapped_type>, Args...> is true.
+// nothing mentioned for emplace_hint
+template <class M, class... Args>
+concept CanEmplaceHint =
+    requires(M m, typename M::const_iterator i, Args&&... args) { m.emplace_hint(i, std::forward<Args>(args)...); };
+
+using Map = std::flat_multimap<Emplaceable, Emplaceable>;
+static_assert(CanEmplaceHint<Map>);
+static_assert(CanEmplaceHint<Map, Emplaceable, Emplaceable>);
+static_assert(CanEmplaceHint<Map, std::piecewise_construct_t, std::tuple<int, double>, std::tuple<int, double>>);
+static_assert(!CanEmplaceHint<Map, Emplaceable>);
+static_assert(!CanEmplaceHint<Map, int, double>);
+#endif
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using R     = M::iterator;
+  {
+    // was empty
+    M m;
+    std::same_as<R> decltype(auto) r = m.emplace_hint(m.end(), typename M::value_type(2, 3.5));
+    assert(r == m.begin());
+    assert(m.size() == 1);
+    assert(m.begin()->first == 2);
+    assert(m.begin()->second == 3.5);
+  }
+  {
+    // hint correct and no duplicates
+    M m                              = {{0, 0.0}, {1, 1.0}, {3, 3.0}};
+    auto it                          = m.begin() + 2;
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 2);
+    assert(m.size() == 4);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // hint correct and at the begin
+    M m                              = {{3, 3.0}, {4, 4.0}};
+    auto it                          = m.begin();
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin());
+    assert(m.size() == 3);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // hint correct and at the end
+    M m                              = {{0, 0.0}, {1, 1.0}};
+    auto it                          = m.end();
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 2);
+    assert(m.size() == 3);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // hint correct and at first duplicate
+    M m                              = {{0, 0.0}, {1, 1.0}, {2, 1.9}, {2, 2.1}, {3, 3.0}};
+    auto it                          = m.begin() + 2;
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 2);
+    assert(m.size() == 6);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+    assert(std::next(r)->first == 2);
+    assert(std::next(r)->second == 1.9);
+  }
+  {
+    // hint correct and in-between duplicates
+    M m                              = {{0, 0.0}, {1, 1.0}, {2, 1.8}, {2, 1.9}, {2, 2.1}, {3, 3.0}};
+    auto it                          = m.begin() + 4;
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 4);
+    assert(m.size() == 7);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+    assert(std::next(r)->first == 2);
+    assert(std::next(r)->second == 2.1);
+  }
+  {
+    // hint correct and after duplicates
+    M m                              = {{0, 0.0}, {1, 1.0}, {2, 1.8}, {2, 1.9}, {2, 2.1}, {3, 3.0}};
+    auto it                          = m.begin() + 5;
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 5);
+    assert(m.size() == 7);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+    assert(std::next(r)->first == 3);
+    assert(std::next(r)->second == 3.0);
+  }
+  {
+    // hint incorrect and no duplicates
+    M m                              = {{0, 0.0}, {1, 1.0}, {3, 3.0}};
+    auto it                          = m.begin() + 1;
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 2);
+    assert(m.size() == 4);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // hint incorrect and at the begin
+    M m                              = {{0, 0.0}, {1, 1.0}};
+    auto it                          = m.begin();
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin() + 2);
+    assert(m.size() == 3);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // hint incorrect and at the end
+    M m                              = {{3, 3.0}, {4, 4.0}};
+    auto it                          = m.end();
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    assert(r == m.begin());
+    assert(m.size() == 3);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+  }
+  {
+    // hint incorrect and before the first duplicate
+    M m                              = {{0, 0.0}, {1, 1.0}, {2, 1.8}, {2, 1.9}, {2, 2.1}, {3, 3.0}};
+    auto it                          = m.begin();
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    // the result is as left as possible
+    assert(r == m.begin() + 2);
+    assert(m.size() == 7);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+    assert(std::next(r)->first == 2);
+    assert(std::next(r)->second == 1.8);
+  }
+  {
+    // hint incorrect and after the last duplicate
+    M m                              = {{0, 0.0}, {1, 1.0}, {2, 1.8}, {2, 1.9}, {2, 2.1}, {3, 3.0}, {4, 4.0}};
+    auto it                          = m.begin() + 6;
+    std::same_as<R> decltype(auto) r = m.emplace_hint(it, typename M::value_type(2, 2.0));
+    // the result is as right as possible
+    assert(r == m.begin() + 5);
+    assert(m.size() == 8);
+    assert(r->first == 2);
+    assert(r->second == 2.0);
+    assert(std::next(r)->first == 3);
+    assert(std::next(r)->second == 3.0);
+  }
+}
+
+template <class KeyContainer, class ValueContainer>
+void test_emplaceable() {
+  using M = std::flat_multimap<int, Emplaceable, std::less<int>, KeyContainer, ValueContainer>;
+  using R = M::iterator;
+
+  M m;
+  ASSERT_SAME_TYPE(decltype(m.emplace_hint(m.cbegin())), R);
+  R r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(2), std::forward_as_tuple());
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(r->first == 2);
+  assert(r->second == Emplaceable());
+  r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5));
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(r->first == 1);
+  assert(r->second == Emplaceable(2, 3.5));
+  r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.6));
+  assert(r == m.begin() + 1);
+  assert(m.size() == 3);
+  assert(r->first == 1);
+  assert(r->second == Emplaceable(2, 3.6));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  test_emplaceable<std::vector<int>, std::vector<Emplaceable>>();
+  test_emplaceable<std::deque<int>, std::vector<Emplaceable>>();
+  test_emplaceable<MinSequenceContainer<int>, MinSequenceContainer<Emplaceable>>();
+  test_emplaceable<std::vector<int, min_allocator<int>>, std::vector<Emplaceable, min_allocator<Emplaceable>>>();
+
+  {
+    auto emplace_func = [](auto& m, auto key_arg, auto value_arg) {
+      m.emplace_hint(m.begin(), std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+    };
+    test_emplace_exception_guarantee(emplace_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_iter.pass.cpp
new file mode 100644
index 00000000000000..78040be2e043d5
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_iter.pass.cpp
@@ -0,0 +1,127 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// iterator erase(iterator position);
+// iterator erase(const_iterator position);
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using P     = std::pair<Key, Value>;
+  using I     = M::iterator;
+
+  P ar[] = {
+      P(1, 1.5),
+      P(2, 2.5),
+      P(2, 2.6),
+      P(3, 3.5),
+      P(4, 4.5),
+      P(4, 4.5),
+      P(4, 4.7),
+      P(5, 5.5),
+      P(6, 6.5),
+      P(7, 7.5),
+      P(8, 8.5),
+  };
+  M m(ar, ar + sizeof(ar) / sizeof(ar[0]));
+  assert(m.size() == 11);
+  std::same_as<I> decltype(auto) i1 = m.erase(std::next(m.cbegin(), 2));
+  assert(m.size() == 10);
+  assert(i1 == std::next(m.begin(), 2));
+  assert(std::ranges::equal(
+      m,
+      std::vector<P>{
+          {1, 1.5}, {2, 2.5}, {3, 3.5}, {4, 4.5}, {4, 4.5}, {4, 4.7}, {5, 5.5}, {6, 6.5}, {7, 7.5}, {8, 8.5}}));
+
+  std::same_as<I> decltype(auto) i2 = m.erase(std::next(m.begin(), 0));
+  assert(m.size() == 9);
+  assert(i2 == m.begin());
+  assert(std::ranges::equal(
+      m, std::vector<P>{{2, 2.5}, {3, 3.5}, {4, 4.5}, {4, 4.5}, {4, 4.7}, {5, 5.5}, {6, 6.5}, {7, 7.5}, {8, 8.5}}));
+
+  std::same_as<I> decltype(auto) i3 = m.erase(std::next(m.cbegin(), 8));
+  assert(m.size() == 8);
+  assert(i3 == m.end());
+  assert(std::ranges::equal(
+      m, std::vector<P>{{2, 2.5}, {3, 3.5}, {4, 4.5}, {4, 4.5}, {4, 4.7}, {5, 5.5}, {6, 6.5}, {7, 7.5}}));
+
+  std::same_as<I> decltype(auto) i4 = m.erase(std::next(m.begin(), 1));
+  assert(m.size() == 7);
+  assert(i4 == std::next(m.begin()));
+  assert(std::ranges::equal(m, std::vector<P>{{2, 2.5}, {4, 4.5}, {4, 4.5}, {4, 4.7}, {5, 5.5}, {6, 6.5}, {7, 7.5}}));
+
+  std::same_as<I> decltype(auto) i5 = m.erase(std::next(m.cbegin(), 2));
+  assert(m.size() == 6);
+  assert(i5 == std::next(m.begin(), 2));
+  assert(std::ranges::equal(m, std::vector<P>{{2, 2.5}, {4, 4.5}, {4, 4.7}, {5, 5.5}, {6, 6.5}, {7, 7.5}}));
+
+  std::same_as<I> decltype(auto) i6 = m.erase(std::next(m.begin(), 2));
+  assert(m.size() == 5);
+  assert(i6 == std::next(m.begin(), 2));
+  assert(std::ranges::equal(m, std::vector<P>{{2, 2.5}, {4, 4.5}, {5, 5.5}, {6, 6.5}, {7, 7.5}}));
+
+  std::same_as<I> decltype(auto) i7 = m.erase(std::next(m.cbegin(), 0));
+  assert(m.size() == 4);
+  assert(i7 == std::next(m.begin(), 0));
+  assert(std::ranges::equal(m, std::vector<P>{{4, 4.5}, {5, 5.5}, {6, 6.5}, {7, 7.5}}));
+
+  std::same_as<I> decltype(auto) i8 = m.erase(std::next(m.cbegin(), 2));
+  assert(m.size() == 3);
+  assert(i8 == std::next(m.begin(), 2));
+  assert(std::ranges::equal(m, std::vector<P>{{4, 4.5}, {5, 5.5}, {7, 7.5}}));
+
+  std::same_as<I> decltype(auto) i9 = m.erase(std::next(m.cbegin(), 2));
+  assert(m.size() == 2);
+  assert(i9 == std::next(m.begin(), 2));
+  assert(std::ranges::equal(m, std::vector<P>{{4, 4.5}, {5, 5.5}}));
+
+  std::same_as<I> decltype(auto) i10 = m.erase(m.cbegin());
+  assert(m.size() == 1);
+  assert(i10 == m.cbegin());
+  assert(std::ranges::equal(m, std::vector<P>{{5, 5.5}}));
+
+  std::same_as<I> decltype(auto) i11 = m.erase(m.begin());
+  assert(m.size() == 0);
+  assert(i11 == m.begin());
+  assert(i11 == m.end());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto erase_function = [](auto& m, auto) { m.erase(m.begin() + 2); };
+    test_erase_exception_guarantee(erase_function);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_iter_iter.pass.cpp
new file mode 100644
index 00000000000000..103f38c1c5d4a1
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_iter_iter.pass.cpp
@@ -0,0 +1,99 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// iterator erase(const_iterator first, const_iterator last);
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using P     = std::pair<Key, Value>;
+  using I     = M::iterator;
+
+  P ar[] = {
+      P(1, 1.5),
+      P(2, 2.5),
+      P(2, 2.6),
+      P(3, 3.5),
+      P(3, 3.6),
+      P(3, 3.7),
+      P(4, 4.5),
+      P(5, 5.5),
+      P(6, 6.5),
+      P(7, 7.5),
+      P(8, 8.5),
+  };
+  M m(ar, ar + sizeof(ar) / sizeof(ar[0]));
+  assert(m.size() == 11);
+  std::same_as<I> decltype(auto) i1 = m.erase(m.cbegin(), m.cbegin());
+  assert(m.size() == 11);
+  assert(i1 == m.begin());
+  assert(std::ranges::equal(
+      m,
+      std::vector<P>{
+          {1, 1.5},
+          {2, 2.5},
+          {2, 2.6},
+          {3, 3.5},
+          {3, 3.6},
+          {3, 3.7},
+          {4, 4.5},
+          {5, 5.5},
+          {6, 6.5},
+          {7, 7.5},
+          {8, 8.5}}));
+
+  std::same_as<I> decltype(auto) i2 = m.erase(m.cbegin(), std::next(m.cbegin(), 2));
+  assert(m.size() == 9);
+  assert(i2 == m.begin());
+  assert(std::ranges::equal(
+      m, std::vector<P>{{2, 2.6}, {3, 3.5}, {3, 3.6}, {3, 3.7}, {4, 4.5}, {5, 5.5}, {6, 6.5}, {7, 7.5}, {8, 8.5}}));
+
+  std::same_as<I> decltype(auto) i3 = m.erase(std::next(m.cbegin(), 2), std::next(m.cbegin(), 6));
+  assert(m.size() == 5);
+  assert(i3 == std::next(m.begin(), 2));
+  assert(std::ranges::equal(m, std::vector<P>{{2, 2.6}, {3, 3.5}, {6, 6.5}, {7, 7.5}, {8, 8.5}}));
+
+  std::same_as<I> decltype(auto) i4 = m.erase(m.cbegin(), m.cend());
+  assert(m.size() == 0);
+  assert(i4 == m.begin());
+  assert(i4 == m.end());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto erase_function = [](auto& m, auto) { m.erase(m.begin(), m.begin() + 2); };
+    test_erase_exception_guarantee(erase_function);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_key.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_key.pass.cpp
new file mode 100644
index 00000000000000..7944996fba1a04
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_key.pass.cpp
@@ -0,0 +1,99 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// size_type erase(const key_type& k);
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer, class Compare = std::less<>>
+void test() {
+  using M = std::flat_multimap<int, char, Compare, KeyContainer, ValueContainer>;
+
+  auto make = [](std::initializer_list<int> il) {
+    M m;
+    for (int i : il) {
+      m.emplace(i, i);
+    }
+    return m;
+  };
+  M m = make({1, 1, 2, 2, 2, 3, 4, 5, 5, 6, 7, 8, 8, 8, 8, 9});
+  ASSERT_SAME_TYPE(decltype(m.erase(9)), typename M::size_type);
+  auto n = m.erase(10);
+  assert(n == 0);
+  assert(m == make({1, 1, 2, 2, 2, 3, 4, 5, 5, 6, 7, 8, 8, 8, 8, 9}));
+  n = m.erase(4);
+  assert(n == 1);
+  assert(m == make({1, 1, 2, 2, 2, 3, 5, 5, 6, 7, 8, 8, 8, 8, 9}));
+  n = m.erase(1);
+  assert(n == 2);
+  assert(m == make({2, 2, 2, 3, 5, 5, 6, 7, 8, 8, 8, 8, 9}));
+  n = m.erase(8);
+  assert(n == 4);
+  assert(m == make({2, 2, 2, 3, 5, 5, 6, 7, 9}));
+  n = m.erase(3);
+  assert(n == 1);
+  assert(m == make({2, 2, 2, 5, 5, 6, 7, 9}));
+  n = m.erase(4);
+  assert(n == 0);
+  assert(m == make({2, 2, 2, 5, 5, 6, 7, 9}));
+  n = m.erase(6);
+  assert(n == 1);
+  assert(m == make({2, 2, 2, 5, 5, 7, 9}));
+  n = m.erase(7);
+  assert(n == 1);
+  assert(m == make({2, 2, 2, 5, 5, 9}));
+  n = m.erase(2);
+  assert(n == 3);
+  assert(m == make({5, 5, 9}));
+  n = m.erase(5);
+  assert(n == 2);
+  assert(m == make({9}));
+  n = m.erase(9);
+  assert(n == 1);
+  assert(m.empty());
+  n = m.erase(1);
+  assert(n == 0);
+  assert(m.empty());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::vector<int>, std::vector<char>, std::greater<>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  {
+    auto erase_function = [](auto& m, auto key_arg) {
+      using Map = std::decay_t<decltype(m)>;
+      using Key = typename Map::key_type;
+      const Key key{key_arg};
+      m.erase(key);
+    };
+    test_erase_exception_guarantee(erase_function);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_key_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_key_transparent.pass.cpp
new file mode 100644
index 00000000000000..75a2d205b8f87c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/erase_key_transparent.pass.cpp
@@ -0,0 +1,161 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// size_type erase(K&& k);
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanErase        = requires(M m, Transparent<int> k) { m.erase(k); };
+using TransparentMap    = std::flat_multimap<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_multimap<int, double, NonTransparentComparator>;
+static_assert(CanErase<TransparentMap>);
+static_assert(!CanErase<const TransparentMap>);
+static_assert(!CanErase<NonTransparentMap>);
+static_assert(!CanErase<const NonTransparentMap>);
+
+template <class Key, class It>
+struct HeterogeneousKey {
+  explicit HeterogeneousKey(Key key, It it) : key_(key), it_(it) {}
+  operator It() && { return it_; }
+  auto operator<=>(Key key) const { return key_ <=> key; }
+  friend bool operator<(const HeterogeneousKey&, const HeterogeneousKey&) {
+    assert(false);
+    return false;
+  }
+  Key key_;
+  It it_;
+};
+
+template <class KeyContainer, class ValueContainer>
+void test_simple() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::ranges::less, KeyContainer, ValueContainer>;
+
+  M m = {{1, 1}, {2, 2}, {2, 2}, {3, 3}, {3, 4}, {3, 5}, {4, 4}};
+  ASSERT_SAME_TYPE(decltype(m.erase(9)), typename M::size_type);
+  auto n = m.erase(3); // erase(K&&) [with K=int]
+  assert(n == 3);
+  assert((m == M{{1, 1}, {2, 2}, {2, 2}, {4, 4}}));
+  typename M::key_type lvalue = 2;
+  n                           = m.erase(lvalue); // erase(K&&) [with K=int&]
+  assert(n == 2);
+  assert((m == M{{1, 1}, {4, 4}}));
+  const typename M::key_type const_lvalue = 1;
+  n                                       = m.erase(const_lvalue); // erase(const key_type&)
+  assert(n == 1);
+  assert((m == M{{4, 4}}));
+}
+
+template <class KeyContainer, class ValueContainer>
+void test_transparent_comparator() {
+  using M = std::flat_multimap<std::string, int, TransparentComparator, KeyContainer, ValueContainer>;
+  using P = std::pair<std::string, int>;
+  M m     = {
+      {"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"epsilon", 4}, {"eta", 4}, {"gamma", 5}, {"gamma", 6}, {"gamma", 7}};
+  ASSERT_SAME_TYPE(decltype(m.erase(Transparent<std::string>{"abc"})), typename M::size_type);
+
+  auto n = m.erase(Transparent<std::string>{"epsilon"});
+  assert(n == 2);
+  assert(std::ranges::equal(
+      m, std::vector<P>{{"alpha", 1}, {"beta", 2}, {"eta", 4}, {"gamma", 5}, {"gamma", 6}, {"gamma", 7}}));
+
+  auto n2 = m.erase(Transparent<std::string>{"aaa"});
+  assert(n2 == 0);
+  assert(std::ranges::equal(
+      m, std::vector<P>{{"alpha", 1}, {"beta", 2}, {"eta", 4}, {"gamma", 5}, {"gamma", 6}, {"gamma", 7}}));
+
+  auto n3 = m.erase(Transparent<std::string>{"gamma"});
+  assert(n3 == 3);
+  assert(std::ranges::equal(m, std::vector<P>{{"alpha", 1}, {"beta", 2}, {"eta", 4}}));
+
+  auto n4 = m.erase(Transparent<std::string>{"alpha"});
+  assert(n4 == 1);
+  assert(std::ranges::equal(m, std::vector<P>{{"beta", 2}, {"eta", 4}}));
+
+  auto n5 = m.erase(Transparent<std::string>{"alpha"});
+  assert(n5 == 0);
+  assert(std::ranges::equal(m, std::vector<P>{{"beta", 2}, {"eta", 4}}));
+
+  auto n6 = m.erase(Transparent<std::string>{"beta"});
+  assert(n6 == 1);
+  assert(std::ranges::equal(m, std::vector<P>{{"eta", 4}}));
+
+  auto n7 = m.erase(Transparent<std::string>{"eta"});
+  assert(n7 == 1);
+  assert(std::ranges::equal(m, std::vector<P>{}));
+
+  auto n8 = m.erase(Transparent<std::string>{"eta"});
+  assert(n8 == 0);
+  assert(std::ranges::equal(m, std::vector<P>{}));
+}
+
+int main(int, char**) {
+  test_simple<std::vector<int>, std::vector<double>>();
+  test_simple<std::deque<int>, std::vector<double>>();
+  test_simple<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test_simple<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  test_transparent_comparator<std::vector<std::string>, std::vector<int>>();
+  test_transparent_comparator<std::deque<std::string>, std::vector<int>>();
+  test_transparent_comparator<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test_transparent_comparator<std::vector<std::string, min_allocator<std::string>>,
+                              std::vector<int, min_allocator<int>>>();
+
+  {
+    // P2077's HeterogeneousKey example
+    using M = std::flat_multimap<int, int, std::less<>>;
+    M m     = {{1, 1}, {2, 2}, {3, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {6, 6}, {7, 7}, {8, 8}, {8, 8}};
+    auto h1 = HeterogeneousKey<int, M::iterator>(8, m.begin());
+    std::same_as<M::size_type> auto n = m.erase(h1); // lvalue is not convertible to It; erase(K&&) is the best match
+    assert(n == 2);
+    assert((m == M{{1, 1}, {2, 2}, {3, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {6, 6}, {7, 7}}));
+    std::same_as<M::iterator> auto it = m.erase(std::move(h1)); // rvalue is convertible to It; erase(K&&) drops out
+    assert(it == m.begin());
+    assert((m == M{{2, 2}, {3, 3}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {6, 6}, {7, 7}}));
+  }
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_multimap<int, int, TransparentComparator> m(std::sorted_equivalent, {{1, 1}, {2, 2}, {3, 3}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto n = m.erase(Transparent<int>{3});
+    assert(n == 2);
+    assert(transparent_used);
+  }
+  {
+    auto erase_transparent = [](auto& m, auto key_arg) {
+      using Map = std::decay_t<decltype(m)>;
+      using Key = typename Map::key_type;
+      m.erase(Transparent<Key>{key_arg});
+    };
+    test_erase_exception_guarantee(erase_transparent);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/extract.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/extract.pass.cpp
new file mode 100644
index 00000000000000..f5ed4a9663a9da
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/extract.pass.cpp
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// containers extract() &&;
+
+#include <algorithm>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class T>
+concept CanExtract = requires(T&& t) { std::forward<T>(t).extract(); };
+
+static_assert(CanExtract<std::flat_multimap<int, int>&&>);
+static_assert(!CanExtract<std::flat_multimap<int, int>&>);
+static_assert(!CanExtract<std::flat_multimap<int, int> const&>);
+static_assert(!CanExtract<std::flat_multimap<int, int> const&&>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using M = std::flat_multimap<int, int, std::less<int>, KeyContainer, ValueContainer>;
+  M m     = M({1, 2, 2, 2, 3, 3}, {4, 5, 6, 7, 8, 9});
+
+  std::same_as<typename M::containers> auto containers = std::move(m).extract();
+
+  auto expected_keys   = {1, 2, 2, 2, 3, 3};
+  auto expected_values = {4, 5, 6, 7, 8, 9};
+  assert(std::ranges::equal(containers.keys, expected_keys));
+  assert(std::ranges::equal(containers.values, expected_values));
+  check_invariant(m);
+  LIBCPP_ASSERT(m.empty());
+  LIBCPP_ASSERT(m.keys().size() == 0);
+  LIBCPP_ASSERT(m.values().size() == 0);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::vector<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+  {
+    // extracted object maintains invariant if one of underlying container does not clear after move
+    using M = std::flat_multimap<int, int, std::less<>, std::vector<int>, CopyOnlyVector<int>>;
+    M m     = M({1, 2, 2, 2, 3, 3}, {1, 2, 3, 4, 5, 6});
+    std::same_as<M::containers> auto containers = std::move(m).extract();
+    assert(containers.keys.size() == 6);
+    assert(containers.values.size() == 6);
+    check_invariant(m);
+    LIBCPP_ASSERT(m.empty());
+    LIBCPP_ASSERT(m.keys().size() == 0);
+    LIBCPP_ASSERT(m.values().size() == 0);
+  }
+
+  {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+    using KeyContainer   = std::vector<int>;
+    using ValueContainer = ThrowOnMoveContainer<int>;
+    using M              = std::flat_multimap<int, int, std::ranges::less, KeyContainer, ValueContainer>;
+
+    M m;
+    m.emplace(1, 1);
+    m.emplace(1, 1);
+    try {
+      auto c = std::move(m).extract();
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, we try to erase the key after value emplacement failure.
+      // and after erasure failure, we clear the flat_multimap
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+#endif
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_cv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_cv.pass.cpp
new file mode 100644
index 00000000000000..3646595624c0b0
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_cv.pass.cpp
@@ -0,0 +1,81 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// iterator insert(const value_type& v);
+
+#include <flat_map>
+#include <deque>
+#include <cassert>
+#include <functional>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "../helpers.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using R     = typename M::iterator;
+  using VT    = typename M::value_type;
+  M m;
+
+  const VT v1(2, 2.5);
+  std::same_as<R> decltype(auto) r = m.insert(v1);
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(r->first == 2);
+  assert(r->second == 2.5);
+
+  const VT v2(1, 1.5);
+  r = m.insert(v2);
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(r->first == 1);
+  assert(r->second == 1.5);
+
+  const VT v3(3, 3.5);
+  r = m.insert(v3);
+  assert(r == m.begin()+ 2);
+  assert(m.size() == 3);
+  assert(r->first == 3);
+  assert(r->second == 3.5);
+
+  const VT v4(3, 4.5);
+  r = m.insert(v4);
+  assert(r == m.begin()+ 3);
+  assert(m.size() == 4);
+  assert(r->first == 3);
+  assert(r->second == 4.5);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using value_type = typename FlatMap::value_type;
+      const value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+      m.insert(p);
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_initializer_list.pass.cpp
new file mode 100644
index 00000000000000..098b66cc49f188
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_initializer_list.pass.cpp
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// void insert(initializer_list<value_type> il);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using V     = std::pair<int, double>;
+
+  M m = {{1, 1}, {1, 1.5}, {1, 2}, {3, 1}, {3, 1.5}, {3, 2}};
+  m.insert({
+      {4, 1},
+      {4, 1.5},
+      {4, 2},
+      {1, 1},
+      {1, 1.5},
+      {1, 2},
+      {2, 1},
+      {2, 1.5},
+      {2, 2},
+  });
+  assert(m.size() == 15);
+  std::vector<V> expected = {
+      {1, 1},
+      {1, 1.5},
+      {1, 2},
+      {1, 1},
+      {1, 1.5},
+      {1, 2},
+      {2, 1},
+      {2, 1.5},
+      {2, 2},
+      {3, 1},
+      {3, 1.5},
+      {3, 2},
+      {4, 1},
+      {4, 1.5},
+      {4, 2},
+  };
+  assert(std::ranges::equal(m, expected));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, const auto& newValues) {
+      using FlatMap                        = std::decay_t<decltype(m)>;
+      using value_type                     = typename FlatMap::value_type;
+      std::initializer_list<value_type> il = {{newValues[0].first, newValues[0].second}};
+      m.insert(il);
+    };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_cv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_cv.pass.cpp
new file mode 100644
index 00000000000000..9d645043a15cab
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_cv.pass.cpp
@@ -0,0 +1,95 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// iterator insert(const_iterator position, const value_type& v);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "../helpers.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using R     = typename M::iterator;
+  using VT    = typename M::value_type;
+
+  M m;
+  const VT v1(2, 2.5);
+  std::same_as<R> decltype(auto) r = m.insert(m.end(), v1);
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(r->first == 2);
+  assert(r->second == 2.5);
+
+  const VT v2(1, 1.5);
+  r = m.insert(m.end(), v2);
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(r->first == 1);
+  assert(r->second == 1.5);
+
+  const VT v3(3, 3.5);
+  r = m.insert(m.end(), v3);
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r->first == 3);
+  assert(r->second == 3.5);
+
+  const VT v4(3, 4.5);
+  r = m.insert(m.end(), v4);
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 4);
+  assert(r->first == 3);
+  assert(r->second == 4.5);
+
+  const VT v5(2, 5.5);
+  r = m.insert(m.end(), v5);
+  assert(r == m.begin() + 2);
+  assert(m.size() == 5);
+  assert(r->first == 2);
+  assert(r->second == 5.5);
+
+  const VT v6(2, 6.5);
+  r = m.insert(m.begin(), v6);
+  assert(r == m.begin() + 1);
+  assert(m.size() == 6);
+  assert(r->first == 2);
+  assert(r->second == 6.5);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using value_type = typename FlatMap::value_type;
+      const value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+      m.insert(m.begin(), p);
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_iter.pass.cpp
new file mode 100644
index 00000000000000..ae031bd010f76d
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_iter.pass.cpp
@@ -0,0 +1,109 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template <class InputIterator>
+//   void insert(InputIterator first, InputIterator last);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "test_iterators.h"
+#include "min_allocator.h"
+
+// test constraint InputIterator
+template <class M, class... Args>
+concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward<Args>(args)...); };
+
+using Map  = std::flat_multimap<int, int>;
+using Pair = std::pair<int, int>;
+
+static_assert(CanInsert<Map, Pair*, Pair*>);
+static_assert(CanInsert<Map, cpp17_input_iterator<Pair*>, cpp17_input_iterator<Pair*>>);
+static_assert(!CanInsert<Map, int, int>);
+static_assert(!CanInsert<Map, cpp20_input_iterator<Pair*>, cpp20_input_iterator<Pair*>>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using P = std::pair<int, double>;
+  using M = std::flat_multimap<int, double, std::less<int>, KeyContainer, ValueContainer>;
+
+  P ar1[] = {
+      P(2, 1),
+      P(2, 1.5),
+      P(2, 2),
+      P(1, 1),
+      P(1, 1.5),
+      P(1, 2),
+      P(3, 1),
+      P(3, 1.5),
+      P(3, 2),
+  };
+  P ar2[] = {
+      P(4, 1),
+      P(4, 1.5),
+      P(4, 2),
+      P(1, 1),
+      P(1, 1.5),
+      P(1, 2),
+      P(0, 1),
+      P(0, 1.5),
+      P(0, 2),
+  };
+
+  M m;
+  m.insert(cpp17_input_iterator<P*>(ar1), cpp17_input_iterator<P*>(ar1 + sizeof(ar1) / sizeof(ar1[0])));
+  assert(m.size() == 9);
+  std::vector<P> expected{{1, 1}, {1, 1.5}, {1, 2}, {2, 1}, {2, 1.5}, {2, 2}, {3, 1}, {3, 1.5}, {3, 2}};
+  assert(std::ranges::equal(m, expected));
+
+  m.insert(cpp17_input_iterator<P*>(ar2), cpp17_input_iterator<P*>(ar2 + sizeof(ar2) / sizeof(ar2[0])));
+  assert(m.size() == 18);
+  std::vector<P> expected2{
+      {0, 1},
+      {0, 1.5},
+      {0, 2},
+      {1, 1},
+      {1, 1.5},
+      {1, 2},
+      {1, 1},
+      {1, 1.5},
+      {1, 2},
+      {2, 1},
+      {2, 1.5},
+      {2, 2},
+      {3, 1},
+      {3, 1.5},
+      {3, 2},
+      {4, 1},
+      {4, 1.5},
+      {4, 2}};
+  assert(std::ranges::equal(m, expected2));
+}
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, const auto& newValues) { m.insert(newValues.begin(), newValues.end()); };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_rv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_rv.pass.cpp
new file mode 100644
index 00000000000000..61962f4873aee3
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_iter_rv.pass.cpp
@@ -0,0 +1,103 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// iterator insert(const_iterator position, value_type&&);
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "../helpers.h"
+#include "test_macros.h"
+
+template <class Container, class Pair>
+void do_insert_iter_rv_test() {
+  using M = Container;
+  using P = Pair;
+  using R = typename M::iterator;
+  M m;
+  std::same_as<R> decltype(auto) r = m.insert(m.end(), P(2, 2));
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(r->first == 2);
+  assert(r->second == 2);
+
+  r = m.insert(m.end(), P(1, 1));
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(r->first == 1);
+  assert(r->second == 1);
+
+  r = m.insert(m.end(), P(3, 3));
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r->first == 3);
+  assert(r->second == 3);
+
+  r = m.insert(m.end(), P(3, 4));
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 4);
+  assert(r->first == 3);
+  assert(r->second == 4);
+
+  r = m.insert(m.end(), P(2, 5));
+  assert(r == m.begin() + 2);
+  assert(m.size() == 5);
+  assert(r->first == 2);
+  assert(r->second == 5);
+
+  r = m.insert(m.begin(), P(2, 6));
+  assert(r == m.begin() + 1);
+  assert(m.size() == 6);
+  assert(r->first == 2);
+  assert(r->second == 6);
+}
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using P     = std::pair<Key, Value>;
+  using CP    = std::pair<const Key, Value>;
+
+  do_insert_iter_rv_test<M, P>();
+  do_insert_iter_rv_test<M, CP>();
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::vector<int>, std::vector<MoveOnly>>();
+  test<std::deque<int>, std::deque<double>>();
+  test<std::deque<int>, std::deque<MoveOnly>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<MoveOnly>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<MoveOnly, min_allocator<MoveOnly>>>();
+
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using value_type = typename FlatMap::value_type;
+      value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+      m.insert(m.begin(), std::move(p));
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_range.pass.cpp
new file mode 100644
index 00000000000000..97b8f17d1094f2
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_range.pass.cpp
@@ -0,0 +1,101 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<container-compatible-range<value_type> R>
+//   void insert_range(R&& rg);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "MoveOnly.h"
+#include "test_macros.h"
+#include "test_iterators.h"
+#include "min_allocator.h"
+
+// test constraint container-compatible-range
+template <class M, class R>
+concept CanInsertRange = requires(M m, R&& r) { m.insert_range(std::forward<R>(r)); };
+
+using Map = std::flat_multimap<int, double>;
+
+static_assert(CanInsertRange<Map, std::ranges::subrange<std::pair<int, double>*>>);
+static_assert(CanInsertRange<Map, std::ranges::subrange<std::pair<short, double>*>>);
+static_assert(!CanInsertRange<Map, std::ranges::subrange<int*>>);
+static_assert(!CanInsertRange<Map, std::ranges::subrange<double*>>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+
+  {
+    using P                 = std::pair<int, int>;
+    using M                 = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+    using It                = forward_iterator<const P*>;
+    M m                     = {{10, 1}, {8, 2}, {5, 3}, {2, 4}, {1, 5}};
+    P ar[]                  = {{3, 1}, {1, 2}, {4, 3}, {1, 4}, {5, 5}, {9, 6}};
+    std::ranges::subrange r = {It(ar), It(ar + 6)};
+    static_assert(std::ranges::common_range<decltype(r)>);
+    m.insert_range(r);
+    std::vector<P> expected = {{1, 5}, {1, 2}, {1, 4}, {2, 4}, {3, 1}, {4, 3}, {5, 3}, {5, 5}, {8, 2}, {9, 6}, {10, 1}};
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    using P                 = std::pair<int, int>;
+    using M                 = std::flat_multimap<Key, Value, std::greater<>, KeyContainer, ValueContainer>;
+    using It                = cpp20_input_iterator<const P*>;
+    M m                     = {{8, 1}, {5, 2}, {3, 3}, {2, 4}};
+    P ar[]                  = {{3, 1}, {1, 2}, {4, 3}, {1, 4}, {5, 5}, {9, 6}};
+    std::ranges::subrange r = {It(ar), sentinel_wrapper<It>(It(ar + 6))};
+    static_assert(!std::ranges::common_range<decltype(r)>);
+    m.insert_range(r);
+    std::vector<P> expected = {{9, 6}, {8, 1}, {5, 2}, {5, 5}, {4, 3}, {3, 3}, {3, 1}, {2, 4}, {1, 2}, {1, 4}};
+    assert(std::ranges::equal(m, expected));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::vector<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+  {
+    // Items are forwarded correctly from the input range (P2767).
+    std::pair<MoveOnly, MoveOnly> a[] = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}};
+    std::flat_multimap<MoveOnly, MoveOnly> m;
+    m.insert_range(a | std::views::as_rvalue);
+    std::pair<MoveOnly, MoveOnly> expected[] = {{1, 1}, {1, 1}, {3, 3}, {4, 4}, {5, 5}};
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // The element type of the range doesn't need to be std::pair (P2767).
+    std::pair<int, int> pa[] = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}};
+    std::deque<std::reference_wrapper<std::pair<int, int>>> a(pa, pa + 5);
+    std::flat_multimap<int, int> m;
+    m.insert_range(a);
+    std::pair<int, int> expected[] = {{1, 1}, {1, 1}, {3, 3}, {4, 4}, {5, 5}};
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    auto insert_func = [](auto& m, const auto& newValues) { m.insert_range(newValues); };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_range_stability.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_range_stability.pass.cpp
new file mode 100644
index 00000000000000..b4c527aa57d22f
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_range_stability.pass.cpp
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<container-compatible-range<value_type> R>
+//   void insert_range(R&& rg);
+//
+// libc++ uses stable_sort to ensure that flat_multimap's behavior matches multimap's,
+// in terms of which duplicate items are kept.
+// This tests a conforming extension.
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <flat_map>
+#include <random>
+#include <ranges>
+#include <map>
+#include <vector>
+#include <utility>
+
+#include "test_macros.h"
+
+struct Mod256 {
+  bool operator()(int x, int y) const { return (x % 256) < (y % 256); }
+};
+
+int main(int, char**) {
+  {
+    std::mt19937 randomness;
+    std::pair<uint16_t, uint16_t> pairs[400];
+    for (int i = 0; i < 400; ++i) {
+      uint16_t r = randomness();
+      pairs[i]   = {r, r};
+    }
+
+    std::multimap<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200);
+    std::flat_multimap<uint16_t, uint16_t, Mod256> fm(std::sorted_equivalent, m.begin(), m.end());
+    assert(std::ranges::equal(fm, m));
+
+    fm.insert_range(std::views::counted(pairs + 200, 200));
+    m.insert(pairs + 200, pairs + 400);
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+
+  {
+    std::vector<std::pair<int, int>> v{{1, 2}, {1, 3}};
+    std::flat_multimap<int, int> m;
+    m.insert_range(v);
+    assert(m.size() == 2);
+    LIBCPP_ASSERT(m.begin()->second == 2);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_rv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_rv.pass.cpp
new file mode 100644
index 00000000000000..573150248ca485
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_rv.pass.cpp
@@ -0,0 +1,116 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// iterator insert( value_type&& v);
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+#include "../helpers.h"
+
+template <class Container, class Pair>
+void do_insert_rv_test() {
+  using M = Container;
+  using P = Pair;
+  using R = typename M::iterator;
+  M m;
+  std::same_as<R> decltype(auto) r = m.insert(P(2, 2));
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(r->first == 2);
+  assert(r->second == 2);
+
+  r = m.insert(P(1, 1));
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(r->first == 1);
+  assert(r->second == 1);
+
+  r = m.insert(P(3, 3));
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r->first == 3);
+  assert(r->second == 3);
+
+  r = m.insert(P(3, 3));
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 4);
+  assert(r->first == 3);
+  assert(r->second == 3);
+}
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  using P  = std::pair<Key, Value>;
+  using CP = std::pair<const Key, Value>;
+
+  do_insert_rv_test<M, P>();
+  do_insert_rv_test<M, CP>();
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<MoveOnly>>();
+  test<std::deque<int>, std::vector<MoveOnly>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<MoveOnly>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<MoveOnly, min_allocator<MoveOnly>>>();
+
+  {
+    using M = std::flat_multimap<int, MoveOnly>;
+    using R = M::iterator;
+    M m;
+    R r = m.insert({2, MoveOnly(2)});
+    assert(r == m.begin());
+    assert(m.size() == 1);
+    assert(r->first == 2);
+    assert(r->second == 2);
+
+    r = m.insert({1, MoveOnly(1)});
+    assert(r == m.begin());
+    assert(m.size() == 2);
+    assert(r->first == 1);
+    assert(r->second == 1);
+
+    r = m.insert({3, MoveOnly(3)});
+    assert(r == std::ranges::prev(m.end()));
+    assert(m.size() == 3);
+    assert(r->first == 3);
+    assert(r->second == 3);
+
+    r = m.insert({3, MoveOnly(3)});
+    assert(r == std::ranges::prev(m.end()));
+    assert(m.size() == 4);
+    assert(r->first == 3);
+    assert(r->second == 3);
+  }
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using value_type = typename FlatMap::value_type;
+      value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+      m.insert(std::move(p));
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_sorted_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_sorted_initializer_list.pass.cpp
new file mode 100644
index 00000000000000..334dff0a0d2f6f
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_sorted_initializer_list.pass.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// void insert(sorted_equivalent_t, initializer_list<value_type> il);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  using V = std::pair<const Key, Value>;
+  M m     = {{1, 1}, {1, 1.5}, {1, 2}, {3, 1}, {3, 1.5}, {3, 2}};
+  m.insert(std::sorted_equivalent,
+           {
+               {0, 1},
+               {1, 2},
+               {1, 3},
+               {2, 1},
+               {2, 4},
+               {4, 1},
+           });
+  assert(m.size() == 12);
+  V expected[] = {{0, 1}, {1, 1}, {1, 1.5}, {1, 2}, {1, 2}, {1, 3}, {2, 1}, {2, 4}, {3, 1}, {3, 1.5}, {3, 2}, {4, 1}};
+  assert(std::ranges::equal(m, expected));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, const auto& newValues) {
+      using FlatMap                        = std::decay_t<decltype(m)>;
+      using value_type                     = typename FlatMap::value_type;
+      std::initializer_list<value_type> il = {{newValues[0].first, newValues[0].second}};
+      m.insert(std::sorted_equivalent, il);
+    };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_sorted_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_sorted_iter_iter.pass.cpp
new file mode 100644
index 00000000000000..37808470a2cf77
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_sorted_iter_iter.pass.cpp
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template <class InputIterator>
+//   void insert(sorted_equivalent_t, InputIterator first, InputIterator last);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "test_iterators.h"
+#include "min_allocator.h"
+
+// test constraint InputIterator
+template <class M, class... Args>
+concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward<Args>(args)...); };
+
+using Map  = std::flat_multimap<int, int>;
+using Pair = std::pair<int, int>;
+
+static_assert(CanInsert<Map, std::sorted_equivalent_t, Pair*, Pair*>);
+static_assert(CanInsert<Map, std::sorted_equivalent_t, cpp17_input_iterator<Pair*>, cpp17_input_iterator<Pair*>>);
+static_assert(!CanInsert<Map, std::sorted_equivalent_t, int, int>);
+static_assert(!CanInsert<Map, std::sorted_equivalent_t, cpp20_input_iterator<Pair*>, cpp20_input_iterator<Pair*>>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using P     = std::pair<Key, Value>;
+
+  P ar1[] = {
+      P(1, 1),
+      P(1, 0),
+      P(2, 1),
+      P(2, 3),
+      P(3, 1),
+  };
+
+  P ar2[] = {
+      P(0, 1),
+      P(2, 2),
+      P(2, 5),
+      P(4, 1),
+      P(4, 4),
+  };
+
+  M m;
+  m.insert(std::sorted_equivalent,
+           cpp17_input_iterator<P*>(ar1),
+           cpp17_input_iterator<P*>(ar1 + sizeof(ar1) / sizeof(ar1[0])));
+  assert(m.size() == 5);
+  P expected[] = {{1, 1}, {1, 0}, {2, 1}, {2, 3}, {3, 1}};
+  assert(std::ranges::equal(m, expected));
+
+  m.insert(std::sorted_equivalent,
+           cpp17_input_iterator<P*>(ar2),
+           cpp17_input_iterator<P*>(ar2 + sizeof(ar2) / sizeof(ar2[0])));
+  assert(m.size() == 10);
+  P expected2[] = {{0, 1}, {1, 1}, {1, 0}, {2, 1}, {2, 3}, {2, 2}, {2, 5}, {3, 1}, {4, 1}, {4, 4}};
+  assert(std::ranges::equal(m, expected2));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, const auto& newValues) {
+      m.insert(std::sorted_equivalent, newValues.begin(), newValues.end());
+    };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_transparent.pass.cpp
new file mode 100644
index 00000000000000..33ca4d4e30469c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/insert_transparent.pass.cpp
@@ -0,0 +1,135 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class K> iterator insert(P&& x);
+// template<class K> iterator insert(const_iterator hint, P&& x);
+
+#include <algorithm>
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <tuple>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "test_iterators.h"
+#include "min_allocator.h"
+
+// Constraints: is_constructible_v<pair<key_type, mapped_type>, P> is true.
+template <class M, class... Args>
+concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward<Args>(args)...); };
+
+using Map  = std::flat_multimap<int, double>;
+using Iter = Map::const_iterator;
+
+static_assert(CanInsert<Map, std::pair<short, double>&&>);
+static_assert(CanInsert<Map, Iter, std::pair<short, double>&&>);
+static_assert(CanInsert<Map, std::tuple<short, double>&&>);
+static_assert(CanInsert<Map, Iter, std::tuple<short, double>&&>);
+static_assert(!CanInsert<Map, int>);
+static_assert(!CanInsert<Map, Iter, int>);
+
+static int expensive_comparisons = 0;
+static int cheap_comparisons     = 0;
+
+struct CompareCounter {
+  int i_ = 0;
+  CompareCounter(int i) : i_(i) {}
+  friend auto operator<=>(const CompareCounter& x, const CompareCounter& y) {
+    expensive_comparisons += 1;
+    return x.i_ <=> y.i_;
+  }
+  bool operator==(const CompareCounter&) const = default;
+  friend auto operator<=>(const CompareCounter& x, int y) {
+    cheap_comparisons += 1;
+    return x.i_ <=> y;
+  }
+};
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  {
+    // insert(P&&)
+    //   Unlike flat_set, here we can't use key_compare to compare value_type versus P,
+    //   so we must eagerly convert to value_type.
+    M m                                                 = {{1, 1}, {2, 2}, {3, 1}, {3, 4}, {4, 4}, {5, 5}};
+    expensive_comparisons                               = 0;
+    cheap_comparisons                                   = 0;
+    std::same_as<typename M::iterator> decltype(auto) r = m.insert(std::make_pair(3, 3)); // conversion happens first
+    assert(expensive_comparisons >= 2);
+    assert(cheap_comparisons == 0);
+    assert(r == m.begin() + 4);
+
+    std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 1}, {3, 4}, {3, 3}, {4, 4}, {5, 5}};
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // insert(const_iterator, P&&)
+    M m                                        = {{1, 1}, {2, 2}, {3, 1}, {3, 4}, {4, 4}, {5, 5}};
+    expensive_comparisons                      = 0;
+    cheap_comparisons                          = 0;
+    std::same_as<typename M::iterator> auto it = m.insert(m.begin(), std::make_pair(3, 3));
+    assert(expensive_comparisons >= 2);
+    assert(cheap_comparisons == 0);
+    assert(it == m.begin() + 2);
+    std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 3}, {3, 1}, {3, 4}, {4, 4}, {5, 5}};
+    assert(std::ranges::equal(m, expected));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<CompareCounter>, std::vector<double>>();
+  test<std::deque<CompareCounter>, std::vector<double>>();
+  test<MinSequenceContainer<CompareCounter>, MinSequenceContainer<double>>();
+  test<std::vector<CompareCounter, min_allocator<CompareCounter>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    // no ambiguity between insert(pos, P&&) and insert(first, last)
+    using M = std::flat_multimap<int, int>;
+    struct Evil {
+      operator M::value_type() const;
+      operator M::const_iterator() const;
+    };
+    std::flat_multimap<int, int> m;
+    ASSERT_SAME_TYPE(decltype(m.insert(Evil())), M::iterator);
+    ASSERT_SAME_TYPE(decltype(m.insert(m.begin(), Evil())), M::iterator);
+    ASSERT_SAME_TYPE(decltype(m.insert(m.begin(), m.end())), void);
+  }
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using tuple_type = std::tuple<typename FlatMap::key_type, typename FlatMap::mapped_type>;
+      tuple_type t(key_arg, value_arg);
+      m.insert(t);
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+  {
+    auto insert_func_iter = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using tuple_type = std::tuple<typename FlatMap::key_type, typename FlatMap::mapped_type>;
+      tuple_type t(key_arg, value_arg);
+      m.insert(m.begin(), t);
+    };
+    test_emplace_exception_guarantee(insert_func_iter);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/replace.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/replace.pass.cpp
new file mode 100644
index 00000000000000..86fbaff468ab6d
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/replace.pass.cpp
@@ -0,0 +1,82 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// void replace(key_container_type&& key_cont, mapped_container_type&& mapped_cont);
+
+#include <algorithm>
+#include <deque>
+#include <concepts>
+#include <flat_map>
+#include <functional>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class T, class... Args>
+concept CanReplace = requires(T t, Args&&... args) { t.replace(std::forward<Args>(args)...); };
+
+using Map = std::flat_multimap<int, int>;
+static_assert(CanReplace<Map, std::vector<int>, std::vector<int>>);
+static_assert(!CanReplace<Map, const std::vector<int>&, std::vector<int>>);
+static_assert(!CanReplace<Map, std::vector<int>, const std::vector<int>&>);
+static_assert(!CanReplace<Map, const std::vector<int>&, const std::vector<int>&>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m                       = M({1, 1, 3}, {4, 5, 6});
+  KeyContainer new_keys     = {7, 7};
+  ValueContainer new_values = {9, 10};
+  auto expected_keys        = new_keys;
+  auto expected_values      = new_values;
+  m.replace(std::move(new_keys), std::move(new_values));
+  assert(m.size() == 2);
+  assert(std::ranges::equal(m.keys(), expected_keys));
+  assert(std::ranges::equal(m.values(), expected_values));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+    using KeyContainer   = std::vector<int>;
+    using ValueContainer = ThrowOnMoveContainer<int>;
+    using M              = std::flat_multimap<int, int, std::ranges::less, KeyContainer, ValueContainer>;
+
+    M m;
+    m.emplace(1, 1);
+    m.emplace(2, 2);
+    try {
+      KeyContainer new_keys{3, 4};
+      ValueContainer new_values{5, 6};
+      m.replace(std::move(new_keys), std::move(new_values));
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, we clear the map
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+#endif
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_exception.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_exception.pass.cpp
new file mode 100644
index 00000000000000..a1252f301309ab
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_exception.pass.cpp
@@ -0,0 +1,80 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// `check_assertion.h` requires Unix headers and regex support.
+// REQUIRES: has-unix-headers
+// UNSUPPORTED: no-localization
+// UNSUPPORTED: no-exceptions
+
+// <flat_map>
+
+// class flat_multimap
+
+// void swap(flat_multimap& y) noexcept;
+// friend void swap(flat_multimap& x, flat_multimap& y) noexcept
+
+// Test that std::terminate is called if any exception is thrown during swap
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "../helpers.h"
+#include "check_assertion.h"
+
+template <class F>
+void test_swap_exception_guarantee([[maybe_unused]] F&& swap_function) {
+  {
+    // key swap throws
+    using KeyContainer   = ThrowOnMoveContainer<int>;
+    using ValueContainer = std::vector<int>;
+    using M              = std::flat_multimap<int, int, TransparentComparator, KeyContainer, ValueContainer>;
+
+    M m1, m2;
+    m1.emplace(1, 1);
+    m1.emplace(1, 2);
+    m2.emplace(3, 3);
+    m2.emplace(3, 4);
+    // swap is noexcept
+    EXPECT_STD_TERMINATE([&] { swap_function(m1, m2); });
+  }
+
+  {
+    // value swap throws
+    using KeyContainer   = std::vector<int>;
+    using ValueContainer = ThrowOnMoveContainer<int>;
+    using M              = std::flat_multimap<int, int, TransparentComparator, KeyContainer, ValueContainer>;
+
+    M m1, m2;
+    m1.emplace(1, 1);
+    m1.emplace(1, 2);
+    m2.emplace(3, 3);
+    m2.emplace(3, 4);
+
+    // swap is noexcept
+    EXPECT_STD_TERMINATE([&] { swap_function(m1, m2); });
+  }
+}
+
+int main(int, char**) {
+  {
+    auto swap_func = [](auto& m1, auto& m2) { swap(m1, m2); };
+    test_swap_exception_guarantee(swap_func);
+  }
+
+  {
+    auto swap_func = [](auto& m1, auto& m2) { m1.swap(m2); };
+    test_swap_exception_guarantee(swap_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_free.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_free.pass.cpp
new file mode 100644
index 00000000000000..f96155d714dc9a
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_free.pass.cpp
@@ -0,0 +1,99 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// friend void swap(flat_multimap& x, flat_multimap& y) noexcept
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+#include "../helpers.h"
+
+// test noexcept
+
+template <class T>
+concept NoExceptAdlSwap = requires(T t1, T t2) {
+  { swap(t1, t2) } noexcept;
+};
+
+static_assert(NoExceptAdlSwap<std::flat_multimap<int, int>>);
+
+#ifndef TEST_HAS_NO_EXCEPTIONS
+static_assert(NoExceptAdlSwap<
+              std::flat_multimap<int, int, std::less<int>, ThrowOnMoveContainer<int>, ThrowOnMoveContainer<int>>>);
+#endif
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using V     = std::pair<const Key, Value>;
+
+  {
+    M m1;
+    M m2;
+    M m1_save = m1;
+    M m2_save = m2;
+    swap(m1, m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar2[] = {V(5, 5), V(5, 6), V(5, 7), V(8, 8), V(9, 9), V(10, 10), V(10, 11), V(10, 12)};
+    M m1;
+    M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0]));
+    M m1_save = m1;
+    M m2_save = m2;
+    swap(m1, m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar1[] = {V(1, 1), V(1, 2), V(3, 3), V(4, 4)};
+    M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0]));
+    M m2;
+    M m1_save = m1;
+    M m2_save = m2;
+    swap(m1, m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar1[] = {V(1, 1), V(1, 2), V(3, 3), V(4, 4)};
+    V ar2[] = {V(5, 5), V(5, 6), V(5, 7), V(8, 8), V(9, 9), V(10, 10), V(10, 11), V(10, 12)};
+    M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0]));
+    M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0]));
+    M m1_save = m1;
+    M m2_save = m2;
+    swap(m1, m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_member.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_member.pass.cpp
new file mode 100644
index 00000000000000..ab7be3b8ac22e7
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.modifiers/swap_member.pass.cpp
@@ -0,0 +1,97 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// void swap(flat_multimap& y) noexcept;
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+#include "../helpers.h"
+
+// test noexcept
+
+template <class T>
+concept NoExceptMemberSwap = requires(T t1, T t2) {
+  { t1.swap(t2) } noexcept;
+};
+
+static_assert(NoExceptMemberSwap<std::flat_multimap<int, int>>);
+#ifndef TEST_HAS_NO_EXCEPTIONS
+static_assert(NoExceptMemberSwap<
+              std::flat_multimap<int, int, std::less<int>, ThrowOnMoveContainer<int>, ThrowOnMoveContainer<int>>>);
+#endif
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using V     = std::pair<const Key, Value>;
+  {
+    M m1;
+    M m2;
+    M m1_save = m1;
+    M m2_save = m2;
+    m1.swap(m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar2[] = {V(5, 5), V(5, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(10, 11), V(12, 12)};
+    M m1;
+    M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0]));
+    M m1_save = m1;
+    M m2_save = m2;
+    m1.swap(m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar1[] = {V(1, 1), V(1, 2), V(3, 3), V(4, 4)};
+    M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0]));
+    M m2;
+    M m1_save = m1;
+    M m2_save = m2;
+    m1.swap(m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar1[] = {V(1, 1), V(1, 2), V(3, 3), V(4, 4)};
+    V ar2[] = {V(5, 5), V(5, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(10, 11), V(12, 12)};
+    M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0]));
+    M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0]));
+    M m1_save = m1;
+    M m2_save = m2;
+    m1.swap(m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.observers/comp.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.observers/comp.pass.cpp
new file mode 100644
index 00000000000000..47140132c6e476
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.observers/comp.pass.cpp
@@ -0,0 +1,98 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// key_compare key_comp() const;
+// value_compare value_comp() const;
+
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "test_macros.h"
+
+int main(int, char**) {
+  {
+    using M    = std::flat_multimap<int, char>;
+    using Comp = std::less<int>; // the default
+    M m        = {};
+    ASSERT_SAME_TYPE(M::key_compare, Comp);
+    static_assert(!std::is_same_v<M::value_compare, Comp>);
+    ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp);
+    ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare);
+    Comp kc = m.key_comp();
+    assert(kc(1, 2));
+    assert(!kc(2, 1));
+    auto vc = m.value_comp();
+    ASSERT_SAME_TYPE(decltype(vc(std::make_pair(1, 2), std::make_pair(1, 2))), bool);
+    assert(vc({1, '2'}, {2, '1'}));
+    assert(!vc({2, '1'}, {1, '2'}));
+  }
+  {
+    using Comp = std::function<bool(int, int)>;
+    using M    = std::flat_multimap<int, int, Comp>;
+    Comp comp  = std::greater<int>();
+    M m({}, comp);
+    ASSERT_SAME_TYPE(M::key_compare, Comp);
+    ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp);
+    ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare);
+    Comp kc = m.key_comp();
+    assert(!kc(1, 2));
+    assert(kc(2, 1));
+    auto vc = m.value_comp();
+    auto a  = std::make_pair(1, 2);
+    ASSERT_SAME_TYPE(decltype(vc(a, a)), bool);
+    static_assert(!noexcept(vc(a, a)));
+    assert(!vc({1, 2}, {2, 1}));
+    assert(vc({2, 1}, {1, 2}));
+  }
+  {
+    using Comp = std::less<>;
+    using M    = std::flat_multimap<int, int, Comp>;
+    M m        = {};
+    ASSERT_SAME_TYPE(M::key_compare, Comp);
+    ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp);
+    ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare);
+    Comp kc = m.key_comp();
+    assert(kc(1, 2));
+    assert(!kc(2, 1));
+    auto vc = m.value_comp();
+    auto a  = std::make_pair(1, 2);
+    ASSERT_SAME_TYPE(decltype(vc(a, a)), bool);
+    assert(vc({1, 2}, {2, 1}));
+    assert(!vc({2, 1}, {1, 2}));
+  }
+  {
+    using Comp = std::function<bool(const std::vector<int>&, const std::vector<int>&)>;
+    using M    = std::flat_multimap<std::vector<int>, int, Comp>;
+    Comp comp  = [i = 1](const auto& x, const auto& y) { return x[i] < y[i]; };
+    M m({}, comp);
+    auto vc = m.value_comp();
+    static_assert(sizeof(vc) >= sizeof(Comp));
+    comp = nullptr;
+    m    = M({}, nullptr);
+    assert(m.key_comp() == nullptr);
+    // At this point, m.key_comp() is disengaged.
+    // But the std::function captured by copy inside `vc` remains valid.
+    auto a = std::make_pair(std::vector<int>{2, 1, 4}, 42);
+    auto b = std::make_pair(std::vector<int>{1, 2, 3}, 42);
+    auto c = std::make_pair(std::vector<int>{0, 3, 2}, 42);
+    assert(vc(a, b));
+    assert(vc(b, c));
+    assert(!vc(b, a));
+    assert(!vc(c, b));
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.observers/keys_values.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.observers/keys_values.pass.cpp
new file mode 100644
index 00000000000000..c7c674c034bcaa
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.observers/keys_values.pass.cpp
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// const key_container_type& keys() const noexcept
+// const mapped_container_type& values() const noexcept
+
+#include <algorithm>
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+#include <deque>
+#include <string>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  const M m                                                 = {{4, 'a'}, {2, 'b'}, {2, 'e'}, {3, 'c'}};
+  std::same_as<const KeyContainer&> decltype(auto) keys     = m.keys();
+  std::same_as<const ValueContainer&> decltype(auto) values = m.values();
+
+  // noexcept
+  static_assert(noexcept(m.keys()));
+  static_assert(noexcept(m.values()));
+
+  auto expected_keys   = {2, 2, 3, 4};
+  auto expected_values = {'b', 'e', 'c', 'a'};
+  assert(std::ranges::equal(keys, expected_keys));
+  assert(std::ranges::equal(values, expected_values));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/contains.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/contains.pass.cpp
new file mode 100644
index 00000000000000..b3ea0b65a3d935
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/contains.pass.cpp
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// bool contains(const key_type& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  {
+    using M = std::flat_multimap<Key, Value, std::less<>, KeyContainer, ValueContainer>;
+    M m     = {{1, 1}, {2, 2}, {2, 3}, {4, 4}, {5, 5}, {8, 1}, {8, 2}, {8, 8}};
+    assert(!m.contains(0));
+    assert(m.contains(1));
+    assert(m.contains(2));
+    assert(!m.contains(3));
+    assert(m.contains(4));
+    assert(m.contains(5));
+    assert(!m.contains(6));
+    assert(!m.contains(7));
+    assert(std::as_const(m).contains(8));
+    assert(!std::as_const(m).contains(9));
+    m.clear();
+    assert(!m.contains(1));
+  }
+  {
+    using M = std::flat_multimap<Key, Value, std::greater<int>, KeyContainer, ValueContainer>;
+    M m     = {{1, 0}, {2, 0}, {4, 0}, {2, 1}, {5, 1}, {5, 2}, {5, 0}, {8, 0}};
+    assert(!m.contains(0));
+    assert(m.contains(1));
+    assert(m.contains(2));
+    assert(!m.contains(3));
+    assert(m.contains(4));
+    assert(m.contains(5));
+    assert(!m.contains(6));
+    assert(!m.contains(7));
+    assert(std::as_const(m).contains(8));
+    assert(!std::as_const(m).contains(9));
+    m.clear();
+    assert(!m.contains(1));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::vector<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/contains_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/contains_transparent.pass.cpp
new file mode 100644
index 00000000000000..8a66ec63768d7d
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/contains_transparent.pass.cpp
@@ -0,0 +1,73 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class K> bool contains(const K& x) const;
+
+#include <cassert>
+#include <flat_map>
+#include <string>
+#include <utility>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanContains     = requires(M m, Transparent<int> k) { m.contains(k); };
+using TransparentMap    = std::flat_multimap<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_multimap<int, double, NonTransparentComparator>;
+static_assert(CanContains<TransparentMap>);
+static_assert(CanContains<const TransparentMap>);
+static_assert(!CanContains<NonTransparentMap>);
+static_assert(!CanContains<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m = {{"alpha", 1}, {"beta", 2}, {"beta", 0}, {"epsilon", 3}, {"eta", 4}, {"eta", 1}, {"gamma", 5}};
+  ASSERT_SAME_TYPE(decltype(m.contains(Transparent<std::string>{"abc"})), bool);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).contains(Transparent<std::string>{"b"})), bool);
+  assert(m.contains(Transparent<std::string>{"alpha"}) == true);
+  assert(m.contains(Transparent<std::string>{"beta"}) == true);
+  assert(m.contains(Transparent<std::string>{"epsilon"}) == true);
+  assert(m.contains(Transparent<std::string>{"eta"}) == true);
+  assert(m.contains(Transparent<std::string>{"gamma"}) == true);
+  assert(m.contains(Transparent<std::string>{"al"}) == false);
+  assert(m.contains(Transparent<std::string>{""}) == false);
+  assert(m.contains(Transparent<std::string>{"g"}) == false);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_multimap<int, int, TransparentComparator> m(std::sorted_equivalent, {{1, 1}, {1, 2}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto b = m.contains(Transparent<int>{3});
+    assert(b);
+    assert(transparent_used);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/count.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/count.pass.cpp
new file mode 100644
index 00000000000000..59b88428cde3c1
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/count.pass.cpp
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// size_type count(const key_type& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+
+  {
+    using M = std::flat_multimap<Key, Value, std::less<>, KeyContainer, ValueContainer>;
+    M m     = {{1, 1}, {2, 2}, {2, 2}, {4, 4}, {4, 1}, {4, 3}, {4, 4}, {5, 5}, {8, 8}};
+    ASSERT_SAME_TYPE(decltype(m.count(0)), size_t);
+    assert(m.count(0) == 0);
+    assert(m.count(1) == 1);
+    assert(m.count(2) == 2);
+    assert(m.count(3) == 0);
+    assert(m.count(4) == 4);
+    assert(m.count(5) == 1);
+    assert(m.count(6) == 0);
+    assert(m.count(7) == 0);
+    assert(std::as_const(m).count(8) == 1);
+    assert(std::as_const(m).count(9) == 0);
+  }
+  {
+    using M = std::flat_multimap<Key, Value, std::greater<int>, KeyContainer, ValueContainer>;
+    M m     = {{1, 0}, {2, 0}, {4, 0}, {1, 0}, {1, 2}, {8, 1}, {5, 0}, {8, 0}};
+    ASSERT_SAME_TYPE(decltype(m.count(0)), size_t);
+    assert(m.count(0) == 0);
+    assert(m.count(1) == 3);
+    assert(m.count(2) == 1);
+    assert(m.count(3) == 0);
+    assert(m.count(4) == 1);
+    assert(m.count(5) == 1);
+    assert(m.count(6) == 0);
+    assert(m.count(7) == 0);
+    assert(std::as_const(m).count(8) == 2);
+    assert(std::as_const(m).count(9) == 0);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::vector<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/count_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/count_transparent.pass.cpp
new file mode 100644
index 00000000000000..41f71065b2f750
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/count_transparent.pass.cpp
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class K> size_type count(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanCount        = requires(M m, Transparent<int> k) { m.count(k); };
+using TransparentMap    = std::flat_multimap<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_multimap<int, double, NonTransparentComparator>;
+static_assert(CanCount<TransparentMap>);
+static_assert(CanCount<const TransparentMap>);
+static_assert(!CanCount<NonTransparentMap>);
+static_assert(!CanCount<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m = {{"alpha", 1},
+         {"beta", 2},
+         {"beta", 2},
+         {"epsilon", 3},
+         {"eta", 4},
+         {"eta", 1},
+         {"eta", 5},
+         {"gamma", 6},
+         {"gamma", 5}};
+  ASSERT_SAME_TYPE(decltype(m.count(Transparent<std::string>{"abc"})), typename M::size_type);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).count(Transparent<std::string>{"b"})), typename M::size_type);
+  assert(m.count(Transparent<std::string>{"alpha"}) == 1);
+  assert(m.count(Transparent<std::string>{"beta"}) == 2);
+  assert(m.count(Transparent<std::string>{"epsilon"}) == 1);
+  assert(m.count(Transparent<std::string>{"eta"}) == 3);
+  assert(m.count(Transparent<std::string>{"gamma"}) == 2);
+  assert(m.count(Transparent<std::string>{"al"}) == 0);
+  assert(m.count(Transparent<std::string>{""}) == 0);
+  assert(m.count(Transparent<std::string>{"g"}) == 0);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_multimap<int, int, TransparentComparator> m(
+        std::sorted_equivalent, {{1, 1}, {2, 2}, {2, 2}, {3, 3}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto n = m.count(Transparent<int>{3});
+    assert(n == 2);
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/equal_range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/equal_range.pass.cpp
new file mode 100644
index 00000000000000..ac369b77a7f3d3
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/equal_range.pass.cpp
@@ -0,0 +1,81 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// pair<iterator,iterator>             equal_range(const key_type& k);
+// pair<const_iterator,const_iterator> equal_range(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  {
+    using M  = std::flat_multimap<Key, Value, std::less<>, KeyContainer, ValueContainer>;
+    using R  = std::pair<typename M::iterator, typename M::iterator>;
+    using CR = std::pair<typename M::const_iterator, typename M::const_iterator>;
+    M m      = {{1, 'a'}, {1, 'a'}, {1, 'A'}, {2, 'b'}, {4, 'd'}, {5, 'E'}, {5, 'e'}, {8, 'h'}, {8, 'z'}};
+    ASSERT_SAME_TYPE(decltype(m.equal_range(0)), R);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(0)), CR);
+    auto begin = m.begin();
+    assert(m.equal_range(0) == std::pair(begin, begin));
+    assert(m.equal_range(1) == std::pair(begin, begin + 3));
+    assert(m.equal_range(2) == std::pair(begin + 3, begin + 4));
+    assert(m.equal_range(3) == std::pair(begin + 4, begin + 4));
+    assert(m.equal_range(4) == std::pair(begin + 4, begin + 5));
+    assert(m.equal_range(5) == std::pair(begin + 5, begin + 7));
+    assert(m.equal_range(6) == std::pair(begin + 7, begin + 7));
+    assert(m.equal_range(7) == std::pair(begin + 7, begin + 7));
+    assert(std::as_const(m).equal_range(8) == std::pair(m.cbegin() + 7, m.cbegin() + 9));
+    assert(std::as_const(m).equal_range(9) == std::pair(m.cbegin() + 9, m.cbegin() + 9));
+  }
+
+  {
+    using M  = std::flat_multimap<Key, Value, std::greater<int>, KeyContainer, ValueContainer>;
+    using R  = std::pair<typename M::iterator, typename M::iterator>;
+    using CR = std::pair<typename M::const_iterator, typename M::const_iterator>;
+    M m      = {
+        {1, 'a'}, {2, 'b'}, {2, 'b'}, {2, 'c'}, {4, 'a'}, {4, 'b'}, {4, 'c'}, {4, 'd'}, {5, 'e'}, {8, 'a'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.equal_range(0)), R);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(0)), CR);
+    auto begin = m.begin();
+    assert(m.equal_range(0) == std::pair(begin + 11, begin + 11));
+    assert(m.equal_range(1) == std::pair(begin + 10, begin + 11));
+    assert(m.equal_range(2) == std::pair(begin + 7, begin + 10));
+    assert(m.equal_range(3) == std::pair(begin + 7, begin + 7));
+    assert(m.equal_range(4) == std::pair(begin + 3, begin + 7));
+    assert(m.equal_range(5) == std::pair(begin + 2, begin + 3));
+    assert(m.equal_range(6) == std::pair(begin + 2, begin + 2));
+    assert(m.equal_range(7) == std::pair(begin + 2, begin + 2));
+    assert(std::as_const(m).equal_range(8) == std::pair(m.cbegin(), m.cbegin() + 2));
+    assert(std::as_const(m).equal_range(9) == std::pair(m.cbegin(), m.cbegin()));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/equal_range_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/equal_range_transparent.pass.cpp
new file mode 100644
index 00000000000000..3666492bb921fb
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/equal_range_transparent.pass.cpp
@@ -0,0 +1,110 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class K> pair<iterator,iterator>             equal_range(const K& x);
+// template<class K> pair<const_iterator,const_iterator> equal_range(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanEqualRange   = requires(M m, Transparent<int> k) { m.equal_range(k); };
+using TransparentMap    = std::flat_multimap<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_multimap<int, double, NonTransparentComparator>;
+static_assert(CanEqualRange<TransparentMap>);
+static_assert(CanEqualRange<const TransparentMap>);
+static_assert(!CanEqualRange<NonTransparentMap>);
+static_assert(!CanEqualRange<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  using R        = std::pair<typename M::iterator, typename M::iterator>;
+  using CR       = std::pair<typename M::const_iterator, typename M::const_iterator>;
+  M m            = {{"alpha", 1},
+                    {"alpha", 1},
+                    {"alpha", 3},
+                    {"beta", 2},
+                    {"epsilon", 3},
+                    {"epsilon", 0},
+                    {"eta", 4},
+                    {"gamma", 5},
+                    {"gamma", 1}};
+  const auto& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.equal_range(Transparent<std::string>{"abc"})), R);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(Transparent<std::string>{"b"})), CR);
+
+  auto test_found = [&](auto&& map, const auto& expected_key, std::initializer_list<Value> expected_values) {
+    auto [first, last] = map.equal_range(Transparent<std::string>{expected_key});
+    auto expected_range =
+        expected_values | std::views::transform([&](auto&& val) { return std::pair(expected_key, val); });
+    assert(std::ranges::equal(std::ranges::subrange(first, last), expected_range));
+  };
+
+  auto test_not_found = [&](auto&& map, const std::string& expected_key, long expected_offset) {
+    auto [first, last] = map.equal_range(Transparent<std::string>{expected_key});
+    assert(first == last);
+    assert(first - m.begin() == expected_offset);
+  };
+
+  test_found(m, "alpha", {1, 1, 3});
+  test_found(m, "beta", {2});
+  test_found(m, "epsilon", {3, 0});
+  test_found(m, "eta", {4});
+  test_found(m, "gamma", {5, 1});
+  test_found(cm, "alpha", {1, 1, 3});
+  test_found(cm, "beta", {2});
+  test_found(cm, "epsilon", {3, 0});
+  test_found(cm, "eta", {4});
+  test_found(cm, "gamma", {5, 1});
+
+  test_not_found(m, "charlie", 4);
+  test_not_found(m, "aaa", 0);
+  test_not_found(m, "zzz", 9);
+  test_not_found(cm, "charlie", 4);
+  test_not_found(cm, "aaa", 0);
+  test_not_found(cm, "zzz", 9);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_multimap<int, int, TransparentComparator> m(std::sorted_equivalent, {{1, 1}, {2, 2}, {3, 1}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto p = m.equal_range(Transparent<int>{3});
+    assert(p.first == m.begin() + 2);
+    assert(p.second == m.end());
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/find.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/find.pass.cpp
new file mode 100644
index 00000000000000..74b7051eb0d7be
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/find.pass.cpp
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+//       iterator find(const key_type& k);
+// const_iterator find(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m = {{1, 'a'}, {1, 'a'}, {1, 'b'}, {2, 'c'}, {2, 'b'}, {4, 'a'}, {4, 'd'}, {5, 'e'}, {8, 'a'}, {8, 'h'}};
+  ASSERT_SAME_TYPE(decltype(m.find(0)), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).find(0)), typename M::const_iterator);
+  assert(m.find(0) == m.end());
+  assert(m.find(1) == m.begin());
+  assert(m.find(2) == m.begin() + 3);
+  assert(m.find(3) == m.end());
+  assert(m.find(4) == m.begin() + 5);
+  assert(m.find(5) == m.begin() + 7);
+  assert(m.find(6) == m.end());
+  assert(m.find(7) == m.end());
+  assert(std::as_const(m).find(8) == m.begin() + 8);
+  assert(std::as_const(m).find(9) == m.end());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/find_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/find_transparent.pass.cpp
new file mode 100644
index 00000000000000..7bbdcd38944706
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/find_transparent.pass.cpp
@@ -0,0 +1,99 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class K> iterator       find(const K& x);
+// template<class K> const_iterator find(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanFind         = requires(M m, Transparent<int> k) { m.find(k); };
+using TransparentMap    = std::flat_multimap<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_multimap<int, double, NonTransparentComparator>;
+static_assert(CanFind<TransparentMap>);
+static_assert(CanFind<const TransparentMap>);
+static_assert(!CanFind<NonTransparentMap>);
+static_assert(!CanFind<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m            = {{"alpha", 1},
+                    {"beta", 2},
+                    {"beta", 0},
+                    {"beta", 1},
+                    {"beta", 2},
+                    {"epsilon", 3},
+                    {"epsilon", 1},
+                    {"eta", 4},
+                    {"gamma", 6},
+                    {"gamma", 5}};
+  const auto& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.find(Transparent<std::string>{"abc"})), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).find(Transparent<std::string>{"b"})), typename M::const_iterator);
+
+  auto test_find = [&](auto&& map, const std::string& expected_key, long expected_offset) {
+    auto iter = map.find(Transparent<std::string>{expected_key});
+    assert(iter - map.begin() == expected_offset);
+  };
+
+  test_find(m, "alpha", 0);
+  test_find(m, "beta", 1);
+  test_find(m, "epsilon", 5);
+  test_find(m, "eta", 7);
+  test_find(m, "gamma", 8);
+  test_find(m, "charlie", 10);
+  test_find(m, "aaa", 10);
+  test_find(m, "zzz", 10);
+  test_find(cm, "alpha", 0);
+  test_find(cm, "beta", 1);
+  test_find(cm, "epsilon", 5);
+  test_find(cm, "eta", 7);
+  test_find(cm, "gamma", 8);
+  test_find(cm, "charlie", 10);
+  test_find(cm, "aaa", 10);
+  test_find(cm, "zzz", 10);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_multimap<int, int, TransparentComparator> m(std::sorted_equivalent, {{1, 1}, {2, 2}, {3,3}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto it = m.find(Transparent<int>{3});
+    assert(it != m.end());
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/lower_bound.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/lower_bound.pass.cpp
new file mode 100644
index 00000000000000..c3befdda7de6e3
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/lower_bound.pass.cpp
@@ -0,0 +1,73 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+//       iterator lower_bound(const key_type& k);
+// const_iterator lower_bound(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  {
+    using M = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+    M m     = {{1, 'a'}, {2, 'a'}, {2, 'c'}, {2, 'b'}, {4, 'd'}, {5, 'a'}, {5, 'e'}, {8, 'h'}, {8, 'a'}};
+    ASSERT_SAME_TYPE(decltype(m.lower_bound(0)), typename M::iterator);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(0)), typename M::const_iterator);
+    assert(m.lower_bound(0) == m.begin());
+    assert(m.lower_bound(1) == m.begin());
+    assert(m.lower_bound(2) == m.begin() + 1);
+    assert(m.lower_bound(3) == m.begin() + 4);
+    assert(m.lower_bound(4) == m.begin() + 4);
+    assert(m.lower_bound(5) == m.begin() + 5);
+    assert(m.lower_bound(6) == m.begin() + 7);
+    assert(m.lower_bound(7) == m.begin() + 7);
+    assert(std::as_const(m).lower_bound(8) == m.begin() + 7);
+    assert(std::as_const(m).lower_bound(9) == m.end());
+  }
+  {
+    using M = std::flat_multimap<Key, Value, std::greater<Key>, KeyContainer, ValueContainer>;
+    M m     = {{1, 'a'}, {1, 'b'}, {2, 'b'}, {4, 'd'}, {4, 'a'}, {4, 'e'}, {5, 'e'}, {8, 'a'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.lower_bound(0)), typename M::iterator);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(0)), typename M::const_iterator);
+    assert(m.lower_bound(0) == m.end());
+    assert(m.lower_bound(1) == m.begin() + 7);
+    assert(m.lower_bound(2) == m.begin() + 6);
+    assert(m.lower_bound(3) == m.begin() + 6);
+    assert(m.lower_bound(4) == m.begin() + 3);
+    assert(m.lower_bound(5) == m.begin() + 2);
+    assert(m.lower_bound(6) == m.begin() + 2);
+    assert(m.lower_bound(7) == m.begin() + 2);
+    assert(std::as_const(m).lower_bound(8) == m.begin());
+    assert(std::as_const(m).lower_bound(9) == m.begin());
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/lower_bound_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/lower_bound_transparent.pass.cpp
new file mode 100644
index 00000000000000..b757af132e677a
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/lower_bound_transparent.pass.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class K> iterator       lower_bound(const K& x);
+// template<class K> const_iterator lower_bound(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanLowerBound   = requires(M m, Transparent<int> k) { m.lower_bound(k); };
+using TransparentMap    = std::flat_multimap<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_multimap<int, double, NonTransparentComparator>;
+static_assert(CanLowerBound<TransparentMap>);
+static_assert(CanLowerBound<const TransparentMap>);
+static_assert(!CanLowerBound<NonTransparentMap>);
+static_assert(!CanLowerBound<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m            = {{"alpha", 1},
+                    {"alpha", 2},
+                    {"alpha", 3},
+                    {"beta", 2},
+                    {"epsilon", 3},
+                    {"epsilon", 4},
+                    {"eta", 4},
+                    {"gamma", 5},
+                    {"gamma", 5},
+                    {"gamma", 5},
+                    {"gamma", 5}};
+  const auto& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.lower_bound(Transparent<std::string>{"abc"})), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(Transparent<std::string>{"b"})), typename M::const_iterator);
+
+  auto test_lower_bound = [&](auto&& map, const std::string& expected_key, long expected_offset) {
+    auto iter = map.lower_bound(Transparent<std::string>{expected_key});
+    assert(iter - map.begin() == expected_offset);
+  };
+
+  test_lower_bound(m, "abc", 0);
+  test_lower_bound(m, "alpha", 0);
+  test_lower_bound(m, "beta", 3);
+  test_lower_bound(m, "bets", 4);
+  test_lower_bound(m, "charlie", 4);
+  test_lower_bound(m, "echo", 4);
+  test_lower_bound(m, "epsilon", 4);
+  test_lower_bound(m, "eta", 6);
+  test_lower_bound(m, "gamma", 7);
+  test_lower_bound(m, "golf", 11);
+  test_lower_bound(m, "zzz", 11);
+
+  test_lower_bound(cm, "abc", 0);
+  test_lower_bound(cm, "alpha", 0);
+  test_lower_bound(cm, "beta", 3);
+  test_lower_bound(cm, "bets", 4);
+  test_lower_bound(cm, "charlie", 4);
+  test_lower_bound(cm, "echo", 4);
+  test_lower_bound(cm, "epsilon", 4);
+  test_lower_bound(cm, "eta", 6);
+  test_lower_bound(cm, "gamma", 7);
+  test_lower_bound(cm, "golf", 11);
+  test_lower_bound(cm, "zzz", 11);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_multimap<int, int, TransparentComparator> m(std::sorted_equivalent, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto it = m.lower_bound(Transparent<int>{3});
+    assert(it != m.end());
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/upper_bound.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/upper_bound.pass.cpp
new file mode 100644
index 00000000000000..d73d030236e227
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/upper_bound.pass.cpp
@@ -0,0 +1,76 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+//       iterator upper_bound(const key_type& k);
+// const_iterator upper_bound(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  {
+    using M = std::flat_multimap<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+    M m     = {
+        {1, 'a'}, {2, 'b'}, {4, 'd'}, {4, 'e'}, {4, 'a'}, {4, 'b'}, {5, 'e'}, {5, 'a'}, {8, 'a'}, {8, 'b'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.upper_bound(0)), typename M::iterator);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).upper_bound(0)), typename M::const_iterator);
+    assert(m.upper_bound(0) == m.begin());
+    assert(m.upper_bound(1) == m.begin() + 1);
+    assert(m.upper_bound(2) == m.begin() + 2);
+    assert(m.upper_bound(3) == m.begin() + 2);
+    assert(m.upper_bound(4) == m.begin() + 6);
+    assert(m.upper_bound(5) == m.begin() + 8);
+    assert(m.upper_bound(6) == m.begin() + 8);
+    assert(std::as_const(m).upper_bound(7) == m.begin() + 8);
+    assert(std::as_const(m).upper_bound(8) == m.end());
+    assert(std::as_const(m).upper_bound(9) == m.end());
+  }
+
+  {
+    using M = std::flat_multimap<Key, Value, std::greater<Key>, KeyContainer, ValueContainer>;
+    M m     = {
+        {1, 'a'}, {2, 'b'}, {4, 'd'}, {4, 'e'}, {4, 'a'}, {4, 'b'}, {5, 'e'}, {5, 'a'}, {8, 'a'}, {8, 'b'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.upper_bound(0)), typename M::iterator);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).upper_bound(0)), typename M::const_iterator);
+    assert(m.upper_bound(0) == m.end());
+    assert(m.upper_bound(1) == m.end());
+    assert(m.upper_bound(2) == m.begin() + 10);
+    assert(m.upper_bound(3) == m.begin() + 9);
+    assert(m.upper_bound(4) == m.begin() + 9);
+    assert(m.upper_bound(5) == m.begin() + 5);
+    assert(m.upper_bound(6) == m.begin() + 3);
+    assert(m.upper_bound(7) == m.begin() + 3);
+    assert(std::as_const(m).upper_bound(8) == m.begin() + 3);
+    assert(std::as_const(m).upper_bound(9) == m.begin());
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/upper_bound_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/upper_bound_transparent.pass.cpp
new file mode 100644
index 00000000000000..969489d0fe6190
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/flat.multimap.operations/upper_bound_transparent.pass.cpp
@@ -0,0 +1,106 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// template<class K> iterator       upper_bound(const K& x);
+// template<class K> const_iterator upper_bound(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanUpperBound   = requires(M m, Transparent<int> k) { m.upper_bound(k); };
+using TransparentMap    = std::flat_multimap<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_multimap<int, double, NonTransparentComparator>;
+static_assert(CanUpperBound<TransparentMap>);
+static_assert(CanUpperBound<const TransparentMap>);
+static_assert(!CanUpperBound<NonTransparentMap>);
+static_assert(!CanUpperBound<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_multimap<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m            = {{"alpha", 1},
+                    {"alpha", 2},
+                    {"alpha", 3},
+                    {"beta", 2},
+                    {"epsilon", 3},
+                    {"epsilon", 4},
+                    {"eta", 4},
+                    {"gamma", 5},
+                    {"gamma", 5},
+                    {"gamma", 5},
+                    {"gamma", 5}};
+  const auto& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.lower_bound(Transparent<std::string>{"abc"})), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(Transparent<std::string>{"b"})), typename M::const_iterator);
+
+  auto test_upper_bound = [&](auto&& map, const std::string& expected_key, long expected_offset) {
+    auto iter = map.upper_bound(Transparent<std::string>{expected_key});
+    assert(iter - map.begin() == expected_offset);
+  };
+
+  test_upper_bound(m, "abc", 0);
+  test_upper_bound(m, "alpha", 3);
+  test_upper_bound(m, "beta", 4);
+  test_upper_bound(m, "bets", 4);
+  test_upper_bound(m, "charlie", 4);
+  test_upper_bound(m, "echo", 4);
+  test_upper_bound(m, "epsilon", 6);
+  test_upper_bound(m, "eta", 7);
+  test_upper_bound(m, "gamma", 11);
+  test_upper_bound(m, "golf", 11);
+  test_upper_bound(m, "zzz", 11);
+
+  test_upper_bound(cm, "abc", 0);
+  test_upper_bound(cm, "alpha", 3);
+  test_upper_bound(cm, "beta", 4);
+  test_upper_bound(cm, "bets", 4);
+  test_upper_bound(cm, "charlie", 4);
+  test_upper_bound(cm, "echo", 4);
+  test_upper_bound(cm, "epsilon", 6);
+  test_upper_bound(cm, "eta", 7);
+  test_upper_bound(cm, "gamma", 11);
+  test_upper_bound(cm, "golf", 11);
+  test_upper_bound(cm, "zzz", 11);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_multimap<int, int, TransparentComparator> m(std::sorted_equivalent, {{1, 1}, {2, 2}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto it = m.upper_bound(Transparent<int>{2});
+    assert(it == m.begin() + 3);
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/helpers.h b/libcxx/test/std/containers/container.adaptors/flat.multimap/helpers.h
new file mode 100644
index 00000000000000..252e2454d497ce
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/helpers.h
@@ -0,0 +1,389 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUPPORT_FLAT_MULTIMAP_HELPERS_H
+#define SUPPORT_FLAT_MULTIMAP_HELPERS_H
+
+#include <algorithm>
+#include <cassert>
+#include <string>
+#include <vector>
+#include <flat_map>
+
+#include "test_allocator.h"
+#include "test_macros.h"
+
+template <class... Args>
+void check_invariant(const std::flat_multimap<Args...>& m) {
+  assert(m.keys().size() == m.values().size());
+  const auto& keys = m.keys();
+  assert(std::is_sorted(keys.begin(), keys.end(), m.key_comp()));
+}
+
+struct StartsWith {
+  explicit StartsWith(char ch) : lower_(1, ch), upper_(1, ch + 1) {}
+  StartsWith(const StartsWith&)     = delete;
+  void operator=(const StartsWith&) = delete;
+  struct Less {
+    using is_transparent = void;
+    bool operator()(const std::string& a, const std::string& b) const { return a < b; }
+    bool operator()(const StartsWith& a, const std::string& b) const { return a.upper_ <= b; }
+    bool operator()(const std::string& a, const StartsWith& b) const { return a < b.lower_; }
+    bool operator()(const StartsWith&, const StartsWith&) const {
+      assert(false); // should not be called
+      return false;
+    }
+  };
+
+private:
+  std::string lower_;
+  std::string upper_;
+};
+
+template <class T>
+struct CopyOnlyVector : std::vector<T> {
+  using std::vector<T>::vector;
+
+  CopyOnlyVector(const CopyOnlyVector&) = default;
+  CopyOnlyVector(CopyOnlyVector&& other) : CopyOnlyVector(other) {}
+  CopyOnlyVector(CopyOnlyVector&& other, std::vector<T>::allocator_type alloc) : CopyOnlyVector(other, alloc) {}
+
+  CopyOnlyVector& operator=(const CopyOnlyVector&) = default;
+  CopyOnlyVector& operator=(CopyOnlyVector& other) { return this->operator=(other); }
+};
+
+template <class T, bool ConvertibleToT = false>
+struct Transparent {
+  T t;
+
+  operator T() const
+    requires ConvertibleToT
+  {
+    return t;
+  }
+};
+
+template <class T>
+using ConvertibleTransparent = Transparent<T, true>;
+
+template <class T>
+using NonConvertibleTransparent = Transparent<T, false>;
+
+struct TransparentComparator {
+  using is_transparent = void;
+
+  bool* transparent_used  = nullptr;
+  TransparentComparator() = default;
+  TransparentComparator(bool& used) : transparent_used(&used) {}
+
+  template <class T, bool Convertible>
+  bool operator()(const T& t, const Transparent<T, Convertible>& transparent) const {
+    if (transparent_used != nullptr) {
+      *transparent_used = true;
+    }
+    return t < transparent.t;
+  }
+
+  template <class T, bool Convertible>
+  bool operator()(const Transparent<T, Convertible>& transparent, const T& t) const {
+    if (transparent_used != nullptr) {
+      *transparent_used = true;
+    }
+    return transparent.t < t;
+  }
+
+  template <class T>
+  bool operator()(const T& t1, const T& t2) const {
+    return t1 < t2;
+  }
+};
+
+struct NonTransparentComparator {
+  template <class T, bool Convertible>
+  bool operator()(const T&, const Transparent<T, Convertible>&) const;
+
+  template <class T, bool Convertible>
+  bool operator()(const Transparent<T, Convertible>&, const T&) const;
+
+  template <class T>
+  bool operator()(const T&, const T&) const;
+};
+
+struct NoDefaultCtr {
+  NoDefaultCtr() = delete;
+};
+
+#ifndef TEST_HAS_NO_EXCEPTIONS
+template <class T>
+struct EmplaceUnsafeContainer : std::vector<T> {
+  using std::vector<T>::vector;
+
+  template <class... Args>
+  auto emplace(Args&&... args) -> decltype(std::declval<std::vector<T>>().emplace(std::forward<Args>(args)...)) {
+    if (this->size() > 1) {
+      auto it1 = this->begin();
+      auto it2 = it1 + 1;
+      // messing up the container
+      std::iter_swap(it1, it2);
+    }
+
+    throw 42;
+  }
+
+  template <class... Args>
+  auto insert(Args&&... args) -> decltype(std::declval<std::vector<T>>().insert(std::forward<Args>(args)...)) {
+    if (this->size() > 1) {
+      auto it1 = this->begin();
+      auto it2 = it1 + 1;
+      // messing up the container
+      std::iter_swap(it1, it2);
+    }
+
+    throw 42;
+  }
+};
+
+template <class T>
+struct ThrowOnEraseContainer : std::vector<T> {
+  using std::vector<T>::vector;
+
+  template <class... Args>
+  auto erase(Args&&... args) -> decltype(std::declval<std::vector<T>>().erase(std::forward<Args>(args)...)) {
+    throw 42;
+  }
+};
+
+template <class T>
+struct ThrowOnMoveContainer : std::vector<T> {
+  using std::vector<T>::vector;
+
+  ThrowOnMoveContainer(ThrowOnMoveContainer&&) { throw 42; }
+
+  ThrowOnMoveContainer& operator=(ThrowOnMoveContainer&&) { throw 42; }
+};
+
+#endif
+
+template <class F>
+void test_emplace_exception_guarantee([[maybe_unused]] F&& emplace_function) {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  using C = TransparentComparator;
+  {
+    // Throw on emplace the key, and underlying has strong exception guarantee
+    using KeyContainer = std::vector<int, test_allocator<int>>;
+    using M            = std::flat_multimap<int, int, C, KeyContainer>;
+
+    LIBCPP_STATIC_ASSERT(std::__container_traits<KeyContainer>::__emplacement_has_strong_exception_safety_guarantee);
+
+    test_allocator_statistics stats;
+
+    KeyContainer a({1, 1, 2, 4}, test_allocator<int>{&stats});
+    std::vector<int> b                    = {5, 6, 7, 8};
+    [[maybe_unused]] auto expected_keys   = a;
+    [[maybe_unused]] auto expected_values = b;
+    M m(std::sorted_equivalent, std::move(a), std::move(b));
+
+    stats.throw_after = 1;
+    try {
+      emplace_function(m, 1, 1);
+      assert(false);
+    } catch (const std::bad_alloc&) {
+      check_invariant(m);
+      // In libc++, the flat_multimap is unchanged
+      LIBCPP_ASSERT(m.size() == 4);
+      LIBCPP_ASSERT(m.keys() == expected_keys);
+      LIBCPP_ASSERT(m.values() == expected_values);
+    }
+  }
+  {
+    // Throw on emplace the key, and underlying has no strong exception guarantee
+    using KeyContainer = EmplaceUnsafeContainer<int>;
+    using M            = std::flat_multimap<int, int, C, KeyContainer>;
+
+    LIBCPP_STATIC_ASSERT(!std::__container_traits<KeyContainer>::__emplacement_has_strong_exception_safety_guarantee);
+    KeyContainer a     = {1, 2, 2, 4};
+    std::vector<int> b = {5, 6, 7, 8};
+    M m(std::sorted_equivalent, std::move(a), std::move(b));
+    try {
+      emplace_function(m, 1, 1);
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, the flat_multimap is cleared
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+  {
+    // Throw on emplace the value, and underlying has strong exception guarantee
+    using ValueContainer = std::vector<int, test_allocator<int>>;
+    ;
+    using M = std::flat_multimap<int, int, C, std::vector<int>, ValueContainer>;
+
+    LIBCPP_STATIC_ASSERT(std::__container_traits<ValueContainer>::__emplacement_has_strong_exception_safety_guarantee);
+
+    std::vector<int> a = {1, 3, 3, 4};
+    test_allocator_statistics stats;
+    ValueContainer b({1, 2, 3, 4}, test_allocator<int>{&stats});
+
+    [[maybe_unused]] auto expected_keys   = a;
+    [[maybe_unused]] auto expected_values = b;
+    M m(std::sorted_equivalent, std::move(a), std::move(b));
+
+    stats.throw_after = 1;
+    try {
+      emplace_function(m, 3, 3);
+      assert(false);
+    } catch (const std::bad_alloc&) {
+      check_invariant(m);
+      // In libc++, the emplaced key is erased and the flat_multimap is unchanged
+      LIBCPP_ASSERT(m.size() == 4);
+      LIBCPP_ASSERT(m.keys() == expected_keys);
+      LIBCPP_ASSERT(m.values() == expected_values);
+    }
+  }
+  {
+    // Throw on emplace the value, and underlying has no strong exception guarantee
+    using ValueContainer = EmplaceUnsafeContainer<int>;
+    using M              = std::flat_multimap<int, int, C, std::vector<int>, ValueContainer>;
+
+    LIBCPP_STATIC_ASSERT(!std::__container_traits<ValueContainer>::__emplacement_has_strong_exception_safety_guarantee);
+    std::vector<int> a = {1, 1, 1, 1};
+    ValueContainer b   = {1, 2, 3, 4};
+
+    M m(std::sorted_equivalent, std::move(a), std::move(b));
+
+    try {
+      emplace_function(m, 1, 5);
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, the flat_multimap is cleared
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+  {
+    // Throw on emplace the value, then throw again on erasing the key
+    using KeyContainer   = ThrowOnEraseContainer<int>;
+    using ValueContainer = std::vector<int, test_allocator<int>>;
+    using M              = std::flat_multimap<int, int, C, KeyContainer, ValueContainer>;
+
+    LIBCPP_STATIC_ASSERT(std::__container_traits<ValueContainer>::__emplacement_has_strong_exception_safety_guarantee);
+
+    KeyContainer a = {4, 4, 4, 4};
+    test_allocator_statistics stats;
+    ValueContainer b({1, 2, 3, 4}, test_allocator<int>{&stats});
+
+    M m(std::sorted_equivalent, std::move(a), std::move(b));
+    stats.throw_after = 1;
+    try {
+      emplace_function(m, 0, 0);
+      assert(false);
+    } catch (const std::bad_alloc&) {
+      check_invariant(m);
+      // In libc++, we try to erase the key after value emplacement failure.
+      // and after erasure failure, we clear the flat_multimap
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+#endif
+}
+
+template <class F>
+void test_insert_range_exception_guarantee([[maybe_unused]] F&& insert_function) {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  using KeyContainer   = EmplaceUnsafeContainer<int>;
+  using ValueContainer = std::vector<int>;
+  using M              = std::flat_multimap<int, int, std::ranges::less, KeyContainer, ValueContainer>;
+  test_allocator_statistics stats;
+  KeyContainer a{1, 2, 3, 4};
+  ValueContainer b{1, 2, 3, 4};
+  M m(std::sorted_equivalent, std::move(a), std::move(b));
+
+  std::vector<std::pair<int, int>> newValues = {{0, 0}, {1, 1}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+  stats.throw_after                          = 1;
+  try {
+    insert_function(m, newValues);
+    assert(false);
+  } catch (int) {
+    check_invariant(m);
+    // In libc++, we clear if anything goes wrong when inserting a range
+    LIBCPP_ASSERT(m.size() == 0);
+  }
+#endif
+}
+
+template <class F>
+void test_erase_exception_guarantee([[maybe_unused]] F&& erase_function) {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  {
+    // key erase throws
+    using KeyContainer   = ThrowOnEraseContainer<int>;
+    using ValueContainer = std::vector<int>;
+    using M              = std::flat_multimap<int, int, TransparentComparator, KeyContainer, ValueContainer>;
+
+    KeyContainer a{1, 3, 3, 4};
+    ValueContainer b{1, 3, 3, 4};
+    M m(std::sorted_equivalent, std::move(a), std::move(b));
+    try {
+      erase_function(m, 3);
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, we clear if anything goes wrong when erasing
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+  {
+    // key erase throws
+    using KeyContainer   = std::vector<int>;
+    using ValueContainer = ThrowOnEraseContainer<int>;
+    using M              = std::flat_multimap<int, int, TransparentComparator, KeyContainer, ValueContainer>;
+
+    KeyContainer a{1, 3, 3, 4};
+    ValueContainer b{1, 3, 3, 4};
+    M m(std::sorted_equivalent, std::move(a), std::move(b));
+    try {
+      erase_function(m, 3);
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, we clear if anything goes wrong when erasing
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+#endif
+}
+class Moveable {
+  int int_;
+  double double_;
+
+public:
+  Moveable() : int_(0), double_(0) {}
+  Moveable(int i, double d) : int_(i), double_(d) {}
+  Moveable(Moveable&& x) : int_(x.int_), double_(x.double_) {
+    x.int_    = -1;
+    x.double_ = -1;
+  }
+  Moveable& operator=(Moveable&& x) {
+    int_      = x.int_;
+    x.int_    = -1;
+    double_   = x.double_;
+    x.double_ = -1;
+    return *this;
+  }
+
+  Moveable(const Moveable&)            = delete;
+  Moveable& operator=(const Moveable&) = delete;
+  bool operator==(const Moveable& x) const { return int_ == x.int_ && double_ == x.double_; }
+  bool operator<(const Moveable& x) const { return int_ < x.int_ || (int_ == x.int_ && double_ < x.double_); }
+
+  int get() const { return int_; }
+  bool moved() const { return int_ == -1; }
+};
+
+#endif // SUPPORT_FLAT_MULTIMAP_HELPERS_H
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/incomplete_type.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/incomplete_type.pass.cpp
new file mode 100644
index 00000000000000..e4325b1dfe3ba7
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/incomplete_type.pass.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// Check that std::flat_multimap and its iterators can be instantiated with an incomplete
+// type.
+
+#include <flat_map>
+#include <vector>
+
+struct A {
+  using Map = std::flat_multimap<A, A>;
+  int data;
+  Map m;
+  Map::iterator it;
+  Map::const_iterator cit;
+};
+
+// Implement the operator< required in order to instantiate flat_multimap<A, X>
+bool operator<(A const& L, A const& R) { return L.data < R.data; }
+
+int main(int, char**) {
+  A a;
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/op_compare.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/op_compare.pass.cpp
new file mode 100644
index 00000000000000..680ff1a127ddaf
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/op_compare.pass.cpp
@@ -0,0 +1,133 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_multimap
+
+// friend bool operator==(const flat_multimap& x, const flat_multimap& y);
+// friend synth-three-way-result<value_type>
+//   operator<=>(const flat_multimap& x, const flat_multimap& y);
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <compare>
+#include <flat_map>
+#include <functional>
+#include <limits>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_comparisons.h"
+#include "test_container_comparisons.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+
+  {
+    using C = std::flat_multimap<Key, Value>;
+    C s1    = {{1, 1}};
+    C s2    = {{2, 0}}; // {{1,1}} versus {{2,0}}
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::strong_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisons(s1, s2, false, true));
+    s2 = {{1, 1}}; // {{1,1}} versus {{1,1}}
+    assert(testComparisons(s1, s2, true, false));
+    s2 = {{1, 1}, {2, 0}}; // {{1,1}} versus {{1,1},{2,0}}
+    assert(testComparisons(s1, s2, false, true));
+    s1 = {{0, 0}, {1, 1}, {2, 2}}; // {{0,0},{1,1},{2,2}} versus {{1,1},{2,0}}
+    assert(testComparisons(s1, s2, false, true));
+    s2 = {{0, 0}, {1, 1}, {2, 3}}; // {{0,0},{1,1},{2,2}} versus {{0,0},{1,1},{2,3}}
+    assert(testComparisons(s1, s2, false, true));
+
+    s1 = {{1, 1}, {1, 1}};
+    s2 = {{1, 1}, {1, 1}};
+    assert(testComparisons(s1, s2, true, false));
+
+    s2 = {{1, 1}, {1, 1}, {2, 2}};
+    assert(testComparisons(s1, s2, false, true));
+
+    s2 = {{1, 1}, {2, 2}, {2, 2}};
+    assert(testComparisons(s1, s2, false, true));
+
+    s2 = {{0, 0}, {1, 1}, {1, 1}};
+    assert(testComparisons(s1, s2, false, false));
+  }
+  {
+    // Comparisons use value_type's native operators, not the comparator
+    using C = std::flat_multimap<Key, Value, std::greater<Key>>;
+    C s1    = {{1, 1}};
+    C s2    = {{2, 0}}; // {{1,1}} versus {{2,0}}
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::strong_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisons(s1, s2, false, true));
+    s2 = {{1, 1}}; // {{1,1}} versus {{1,1}}
+    assert(testComparisons(s1, s2, true, false));
+    s2 = {{1, 1}, {2, 0}}; // {{1,1}} versus {{2,0},{1,1}}
+    assert(testComparisons(s1, s2, false, true));
+    s1 = {{0, 0}, {1, 1}, {2, 2}}; // {{2,2},{1,1},{0,0}} versus {2,0},{1,1}}
+    assert(testComparisons(s1, s2, false, false));
+    s2 = {{0, 0}, {1, 1}, {2, 3}}; // {{2,2},{1,1},{0,0}} versus {{2,3},{1,1},{0,0}}
+    assert(testComparisons(s1, s2, false, true));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::deque<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    using C = std::flat_multimap<double, int>;
+    C s1    = {{1, 1}};
+    C s2    = C(std::sorted_equivalent, {{std::numeric_limits<double>::quiet_NaN(), 2}});
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisonsComplete(s1, s2, false, false, false));
+  }
+  {
+    using C = std::flat_multimap<int, double>;
+    C s1    = {{1, 1}};
+    C s2    = C(std::sorted_equivalent, {{2, std::numeric_limits<double>::quiet_NaN()}});
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisonsComplete(s1, s2, false, true, false));
+    s2 = C(std::sorted_equivalent, {{1, std::numeric_limits<double>::quiet_NaN()}});
+    assert(testComparisonsComplete(s1, s2, false, false, false));
+  }
+  {
+    // Comparisons use value_type's native operators, not the comparator
+    struct StrongComp {
+      bool operator()(double a, double b) const { return std::strong_order(a, b) < 0; }
+    };
+    using C = std::flat_multimap<double, double, StrongComp>;
+    C s1    = {{1, 1}};
+    C s2    = {{std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()}};
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisonsComplete(s1, s2, false, false, false));
+    s1 = {{{1, 1}, {std::numeric_limits<double>::quiet_NaN(), 1}}};
+    s2 = {{{std::numeric_limits<double>::quiet_NaN(), 1}, {1, 1}}};
+    assert(std::lexicographical_compare_three_way(
+               s1.keys().begin(), s1.keys().end(), s2.keys().begin(), s2.keys().end(), std::strong_order) ==
+           std::strong_ordering::equal);
+    assert(s1 != s2);
+    assert((s1 <=> s2) == std::partial_ordering::unordered);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.multimap/types.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.multimap/types.compile.pass.cpp
new file mode 100644
index 00000000000000..490d51c2997931
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.multimap/types.compile.pass.cpp
@@ -0,0 +1,133 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+//  using key_type               = Key;
+//  using mapped_type            = T;
+//  using value_type             = pair<key_type, mapped_type>;
+//  using key_compare            = Compare;
+//  using reference              = pair<const key_type&, mapped_type&>;
+//  using const_reference        = pair<const key_type&, const mapped_type&>;
+//  using size_type              = size_t;
+//  using difference_type        = ptrdiff_t;
+//  using iterator               = implementation-defined; // see [container.requirements]
+//  using const_iterator         = implementation-defined; // see [container.requirements]
+//  using reverse_iterator       = std::reverse_iterator<iterator>;
+//  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+//  using key_container_type     = KeyContainer;
+//  using mapped_container_type  = MappedContainer;
+
+//  class value_compare;
+
+//  struct containers {
+//    key_container_type keys;
+//    mapped_container_type values;
+//  };
+
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <string>
+#include <vector>
+#include "min_allocator.h"
+
+void test() {
+  {
+    using M = std::flat_multimap<int, double>;
+    static_assert(std::is_same_v<typename M::key_type, int>);
+    static_assert(std::is_same_v<typename M::mapped_type, double>);
+    static_assert(std::is_same_v<typename M::value_type, std::pair<int, double>>);
+    static_assert(std::is_same_v<typename M::key_compare, std::less<int>>);
+    static_assert(std::is_same_v<typename M::reference, std::pair<const int&, double&>>);
+    static_assert(std::is_same_v<typename M::const_reference, std::pair<const int&, const double&>>);
+    static_assert(std::is_same_v<typename M::size_type, size_t>);
+    static_assert(std::is_same_v<typename M::difference_type, ptrdiff_t>);
+    static_assert(requires { typename M::iterator; });
+    static_assert(requires { typename M::const_iterator; });
+    static_assert(std::is_same_v<typename M::reverse_iterator, std::reverse_iterator<typename M::iterator>>);
+    static_assert(
+        std::is_same_v<typename M::const_reverse_iterator, std::reverse_iterator<typename M::const_iterator>>);
+    static_assert(std::is_same_v<typename M::key_container_type, std::vector<int>>);
+    static_assert(std::is_same_v<typename M::mapped_container_type, std::vector<double>>);
+    static_assert(requires { typename M::value_compare; });
+    static_assert(requires { typename M::containers; });
+    static_assert(std::is_same_v<decltype(M::containers::keys), std::vector<int>>);
+    static_assert(std::is_same_v<decltype(M::containers::values), std::vector<double>>);
+  }
+
+  {
+    struct A {};
+    struct Compare {
+      bool operator()(const std::string&, const std::string&) const;
+    };
+    using M = std::flat_multimap<std::string, A, Compare, std::deque<std::string>, std::deque<A>>;
+    static_assert(std::is_same_v<typename M::key_type, std::string>);
+    static_assert(std::is_same_v<typename M::mapped_type, A>);
+    static_assert(std::is_same_v<typename M::value_type, std::pair<std::string, A>>);
+    static_assert(std::is_same_v<typename M::key_compare, Compare>);
+    static_assert(std::is_same_v<typename M::reference, std::pair<const std::string&, A&>>);
+    static_assert(std::is_same_v<typename M::const_reference, std::pair<const std::string&, const A&>>);
+    static_assert(std::is_same_v<typename M::size_type, size_t>);
+    static_assert(std::is_same_v<typename M::difference_type, ptrdiff_t>);
+    static_assert(requires { typename M::iterator; });
+    static_assert(requires { typename M::const_iterator; });
+    static_assert(std::is_same_v<typename M::reverse_iterator, std::reverse_iterator<typename M::iterator>>);
+    static_assert(
+        std::is_same_v<typename M::const_reverse_iterator, std::reverse_iterator<typename M::const_iterator>>);
+    static_assert(std::is_same_v<typename M::key_container_type, std::deque<std::string>>);
+    static_assert(std::is_same_v<typename M::mapped_container_type, std::deque<A>>);
+    static_assert(requires { typename M::value_compare; });
+    static_assert(requires { typename M::containers; });
+    static_assert(std::is_same_v<decltype(M::containers::keys), std::deque<std::string>>);
+    static_assert(std::is_same_v<decltype(M::containers::values), std::deque<A>>);
+  }
+  {
+    using C = std::flat_multimap<int, short>;
+    static_assert(std::is_same_v<C::key_type, int>);
+    static_assert(std::is_same_v<C::mapped_type, short>);
+    static_assert(std::is_same_v<C::value_type, std::pair<int, short>>);
+    static_assert(std::is_same_v<C::key_compare, std::less<int>>);
+    static_assert(!std::is_same_v<C::value_compare, std::less<int>>);
+    static_assert(std::is_same_v<C::reference, std::pair<const int&, short&>>);
+    static_assert(std::is_same_v<C::const_reference, std::pair<const int&, const short&>>);
+    static_assert(std::random_access_iterator<C::iterator>);
+    static_assert(std::random_access_iterator<C::const_iterator>);
+    static_assert(std::random_access_iterator<C::reverse_iterator>);
+    static_assert(std::random_access_iterator<C::const_reverse_iterator>);
+    static_assert(std::is_same_v<C::reverse_iterator, std::reverse_iterator<C::iterator>>);
+    static_assert(std::is_same_v<C::const_reverse_iterator, std::reverse_iterator<C::const_iterator>>);
+    static_assert(std::is_same_v<C::size_type, std::size_t>);
+    static_assert(std::is_same_v<C::difference_type, std::ptrdiff_t>);
+    static_assert(std::is_same_v<C::key_container_type, std::vector<int>>);
+    static_assert(std::is_same_v<C::mapped_container_type, std::vector<short>>);
+  }
+  {
+    using C = std::flat_multimap<short, int, std::greater<long>, std::deque<short, min_allocator<short>>>;
+    static_assert(std::is_same_v<C::key_type, short>);
+    static_assert(std::is_same_v<C::mapped_type, int>);
+    static_assert(std::is_same_v<C::value_type, std::pair<short, int>>);
+    static_assert(std::is_same_v<C::key_compare, std::greater<long>>);
+    static_assert(!std::is_same_v<C::value_compare, std::greater<long>>);
+    static_assert(std::is_same_v<C::reference, std::pair<const short&, int&>>);
+    static_assert(std::is_same_v<C::const_reference, std::pair<const short&, const int&>>);
+    static_assert(std::random_access_iterator<C::iterator>);
+    static_assert(std::random_access_iterator<C::const_iterator>);
+    static_assert(std::random_access_iterator<C::reverse_iterator>);
+    static_assert(std::random_access_iterator<C::const_reverse_iterator>);
+    static_assert(std::is_same_v<C::reverse_iterator, std::reverse_iterator<C::iterator>>);
+    static_assert(std::is_same_v<C::const_reverse_iterator, std::reverse_iterator<C::const_iterator>>);
+    // size_type is invariably size_t
+    static_assert(std::is_same_v<C::size_type, std::size_t>);
+    static_assert(std::is_same_v<C::difference_type, std::ptrdiff_t>);
+    static_assert(std::is_same_v<C::key_container_type, std::deque<short, min_allocator<short>>>);
+    static_assert(std::is_same_v<C::mapped_container_type, std::vector<int>>);
+  }
+}