[llvm-branch-commits] [clang] [clang-tools-extra] [compiler-rt] [flang] [libc] [libcxx] [lldb] [llvm] [mlir] [polly] [LoopInterchange] Remove some early exits in transform phase (NFCI) (PR #205563)

Wed Jun 24 10:05:43 PDT 2026

=?utf-8?q?Balázs?= Benics <benicsbalazs at gmail.com>,forking-google-bazel-bot[bot]
 =?utf-8?q?,?=Nikolas Klauser <nikolasklauser at berlin.de>,Simon Pilgrim
 <llvm-dev at redking.me.uk>,David CARLIER <devnexen at gmail.com>,Sairudra More
 <sairudra60 at gmail.com>,=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>,Arda
 Serdar Pektezol <arda at pektezol.dev>,Fady Farag <com.webkit.iidmsa at gmail.com>,Nikolas
 Klauser <nikolasklauser at berlin.de>,"Ivan R. Ivanov" <iivanov at nvidia.com>,Nikolas
 Klauser <nikolasklauser at berlin.de>,
=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>,Nikolas Klauser
 <nikolasklauser at berlin.de>,
Andrzej =?utf-8?q?Warzyński?= <andrzej.warzynski at arm.com>,Tony Guillot
 <tony.guillot at protonmail.com>,Simon Pilgrim <llvm-dev at redking.me.uk>,Krzysztof
 Parzyszek <Krzysztof.Parzyszek at amd.com>,Nathan Corbyn <n_corbyn at apple.com>,Lukas
 Sommer <lukas.sommer at amd.com>,Simon Pilgrim <llvm-dev at redking.me.uk>,Ryotaro
 Kasuga <kasuga.ryotaro at fujitsu.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/205563 at github.com>


https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/205563

>From 448c3d54df7bcd5e5be2b5d051832ad00b4cc89c Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 24 Jun 2026 04:20:50 -0400
Subject: [PATCH 01/42] [AArch64] Run cleanup one final time after peephole
 (#199711)

It's a lightweight pass. Should always be the last SSA pass since
peephole can end up making some instructions dead.
---
 .../Target/AArch64/AArch64TargetMachine.cpp   |  4 +-
 llvm/test/CodeGen/AArch64/O3-pipeline.ll      |  1 +
 .../aarch64-neon-vector-insert-uaddlv.ll      | 44 +++++++++----------
 llvm/test/CodeGen/AArch64/fabs-fp128.ll       |  5 +--
 4 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 568563cf53220..c20fb31ab8854 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -815,8 +815,10 @@ void AArch64PassConfig::addMachineSSAOptimization() {
   // Run default MachineSSAOptimization first.
   TargetPassConfig::addMachineSSAOptimization();
 
-  if (TM->getOptLevel() != CodeGenOptLevel::None)
+  if (TM->getOptLevel() != CodeGenOptLevel::None) {
     addPass(createAArch64MIPeepholeOptLegacyPass());
+    addPass(&DeadMachineInstructionElimID);
+  }
 }
 
 bool AArch64PassConfig::addILPOpts() {
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 08d3b94530d14..ed2453941866a 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -172,6 +172,7 @@
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions
 ; CHECK-NEXT:       AArch64 MI Peephole Optimization pass
+; CHECK-NEXT:       Remove dead machine instructions
 ; CHECK-NEXT:       AArch64 Dead register definitions
 ; CHECK-NEXT:       Detect Dead Lanes
 ; CHECK-NEXT:       Init Undef Pass
diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index 91eda8d552397..72270e3be443f 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) {
 ; CHECK-NEXT:    stp xzr, xzr, [x0, #16]
 ; CHECK-NEXT:    uaddlv.8h s0, v0
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
-; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ushll.4s v0, v1, #0
+; CHECK-NEXT:    ucvtf.4s v0, v0
+; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) {
 ; CHECK-NEXT:    add x8, x0, #8
 ; CHECK-NEXT:    uaddlv.8h s0, v0
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
-; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    st1.s { v1 }[2], [x8]
-; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ushll.4s v0, v1, #0
+; CHECK-NEXT:    ucvtf.4s v0, v0
+; CHECK-NEXT:    st1.s { v0 }[2], [x8]
+; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -283,9 +283,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
 ; CHECK-NEXT:    stp q0, q0, [x0, #32]
 ; CHECK-NEXT:    mov.h v2[0], v1[0]
 ; CHECK-NEXT:    bic.4h v2, #255, lsl #8
-; CHECK-NEXT:    ushll.4s v2, v2, #0
-; CHECK-NEXT:    ucvtf.4s v2, v2
-; CHECK-NEXT:    stp q2, q0, [x0]
+; CHECK-NEXT:    ushll.4s v1, v2, #0
+; CHECK-NEXT:    ucvtf.4s v1, v1
+; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -389,9 +389,9 @@ define void @insert_vec_v4i16_uaddlv_from_v4i32(ptr %0) {
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
 ; CHECK-NEXT:    uaddlv.4s d0, v0
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
-; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ushll.4s v0, v1, #0
+; CHECK-NEXT:    ucvtf.4s v0, v0
+; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -408,13 +408,13 @@ define void @insert_vec_v16i16_uaddlv_from_v4i32(ptr %0) {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    movi.2d v0, #0000000000000000
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-NEXT:    movi.2d v2, #0000000000000000
 ; CHECK-NEXT:    uaddlv.4s d0, v0
-; CHECK-NEXT:    stp q2, q2, [x0, #32]
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
+; CHECK-NEXT:    movi.2d v0, #0000000000000000
 ; CHECK-NEXT:    ushll.4s v1, v1, #0
+; CHECK-NEXT:    stp q0, q0, [x0, #32]
 ; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    stp q1, q2, [x0]
+; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -435,9 +435,9 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) {
 ; CHECK-NEXT:    uaddlv.4s d0, v0
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
 ; CHECK-NEXT:    bic.4h v1, #255, lsl #8
-; CHECK-NEXT:    ushll.4s v1, v1, #0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ushll.4s v0, v1, #0
+; CHECK-NEXT:    ucvtf.4s v0, v0
+; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -454,14 +454,14 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    movi.2d v0, #0000000000000000
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
-; CHECK-NEXT:    movi.2d v2, #0000000000000000
 ; CHECK-NEXT:    uaddlv.4s d0, v0
-; CHECK-NEXT:    stp q2, q2, [x0, #32]
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
+; CHECK-NEXT:    movi.2d v0, #0000000000000000
 ; CHECK-NEXT:    bic.4h v1, #255, lsl #8
+; CHECK-NEXT:    stp q0, q0, [x0, #32]
 ; CHECK-NEXT:    ushll.4s v1, v1, #0
 ; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    stp q1, q2, [x0]
+; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 903aa8adf7085..17b75f89b32da 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -144,7 +144,7 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
 ; CHECK-GI-LABEL: fabs_v4f128:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
-; CHECK-GI-NEXT:    mov v7.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
 ; CHECK-GI-NEXT:    mov x9, v1.d[1]
 ; CHECK-GI-NEXT:    mov x10, v2.d[1]
 ; CHECK-GI-NEXT:    mov x11, v3.d[1]
@@ -152,14 +152,13 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v2.d[0], v2.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[0], v3.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
-; CHECK-GI-NEXT:    mov v7.d[1], x8
+; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    and x8, x9, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    and x9, x10, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    and x10, x11, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    mov v1.d[1], x8
 ; CHECK-GI-NEXT:    mov v2.d[1], x9
 ; CHECK-GI-NEXT:    mov v3.d[1], x10
-; CHECK-GI-NEXT:    mov v0.16b, v7.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a)

>From da9252a9453603af579a9f56c3212cce04e831d2 Mon Sep 17 00:00:00 2001
From: Haohai Wen <haohai.wen at intel.com>
Date: Wed, 24 Jun 2026 16:23:52 +0800
Subject: [PATCH 02/42] [ObjectYAML][NFC] Derive BBAddrMap section size from
 the CBA offset (#204056)

Add the CBA offset delta to sh_size once at the end instead of after
each write.
---
 llvm/lib/ObjectYAML/ELFEmitter.cpp | 41 ++++++++++++++----------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index 4d423e71e959e..6dc162356ab40 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1462,6 +1462,7 @@ void ELFState<ELFT>::writeSectionContent(
       PGOAnalyses = &Section.PGOAnalyses.value();
   }
 
+  uint64_t CurrentOffset = CBA.getOffset();
   for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) {
     // Write version and feature values.
     if (E.Version > 5)
@@ -1469,14 +1470,10 @@ void ELFState<ELFT>::writeSectionContent(
                            << static_cast<int>(E.Version)
                            << "; encoding using the most recent version";
     CBA.write(E.Version);
-    SHeader.sh_size += 1;
-    if (E.Version < 5) {
+    if (E.Version < 5)
       CBA.write(static_cast<uint8_t>(E.Feature));
-      SHeader.sh_size += 1;
-    } else {
+    else
       CBA.write<uint16_t>(E.Feature, ELFT::Endianness);
-      SHeader.sh_size += 2;
-    }
     auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(E.Feature);
     if (!FeatureOrErr) {
       // Invalid feature: warn and skip the entry.
@@ -1496,7 +1493,7 @@ void ELFState<ELFT>::writeSectionContent(
       // 'NumBBRanges' field when specified.
       uint64_t NumBBRanges =
           E.NumBBRanges.value_or(E.BBRanges ? E.BBRanges->size() : 0);
-      SHeader.sh_size += CBA.writeULEB128(NumBBRanges);
+      CBA.writeULEB128(NumBBRanges);
     }
     if (!E.BBRanges)
       continue;
@@ -1511,31 +1508,30 @@ void ELFState<ELFT>::writeSectionContent(
       // specified.
       uint64_t NumBlocks =
           BBR.NumBlocks.value_or(BBR.BBEntries ? BBR.BBEntries->size() : 0);
-      SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks);
+      CBA.writeULEB128(NumBlocks);
       // Write all BBEntries in this BBRange.
       if (!BBR.BBEntries || FeatureOrErr->OmitBBEntries)
         continue;
       for (const BBAddrMapYAML::BBAddrMapEntry::BBEntry &BBE : *BBR.BBEntries) {
         ++TotalNumBlocks;
         if (E.Version > 1)
-          SHeader.sh_size += CBA.writeULEB128(BBE.ID);
-        SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset);
+          CBA.writeULEB128(BBE.ID);
+        CBA.writeULEB128(BBE.AddressOffset);
         if (EmitCallsiteEndOffsets) {
           size_t NumCallsiteEndOffsets =
               BBE.CallsiteEndOffsets ? BBE.CallsiteEndOffsets->size() : 0;
-          SHeader.sh_size += CBA.writeULEB128(NumCallsiteEndOffsets);
+          CBA.writeULEB128(NumCallsiteEndOffsets);
           if (BBE.CallsiteEndOffsets) {
             for (uint32_t Offset : *BBE.CallsiteEndOffsets)
-              SHeader.sh_size += CBA.writeULEB128(Offset);
+              CBA.writeULEB128(Offset);
           }
         }
-        SHeader.sh_size += CBA.writeULEB128(BBE.Size);
-        SHeader.sh_size += CBA.writeULEB128(BBE.Metadata);
+        CBA.writeULEB128(BBE.Size);
+        CBA.writeULEB128(BBE.Metadata);
         if (FeatureOrErr->BBHash || BBE.Hash.has_value()) {
           uint64_t Hash =
               BBE.Hash.has_value() ? BBE.Hash.value() : llvm::yaml::Hex64(0);
           CBA.write<uint64_t>(Hash, ELFT::Endianness);
-          SHeader.sh_size += 8;
         }
       }
     }
@@ -1544,7 +1540,7 @@ void ELFState<ELFT>::writeSectionContent(
     const BBAddrMapYAML::PGOAnalysisMapEntry &PGOEntry = PGOAnalyses->at(Idx);
 
     if (PGOEntry.FuncEntryCount)
-      SHeader.sh_size += CBA.writeULEB128(*PGOEntry.FuncEntryCount);
+      CBA.writeULEB128(*PGOEntry.FuncEntryCount);
 
     if (!PGOEntry.PGOBBEntries)
       continue;
@@ -1560,20 +1556,21 @@ void ELFState<ELFT>::writeSectionContent(
 
     for (const auto &PGOBBE : PGOBBEntries) {
       if (PGOBBE.BBFreq)
-        SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq);
+        CBA.writeULEB128(*PGOBBE.BBFreq);
       if (FeatureOrErr->PostLinkCfg || PGOBBE.PostLinkBBFreq.has_value())
-        SHeader.sh_size += CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0));
+        CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0));
       if (PGOBBE.Successors) {
-        SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size());
+        CBA.writeULEB128(PGOBBE.Successors->size());
         for (const auto &[ID, BrProb, PostLinkBrFreq] : *PGOBBE.Successors) {
-          SHeader.sh_size += CBA.writeULEB128(ID);
-          SHeader.sh_size += CBA.writeULEB128(BrProb);
+          CBA.writeULEB128(ID);
+          CBA.writeULEB128(BrProb);
           if (FeatureOrErr->PostLinkCfg || PostLinkBrFreq.has_value())
-            SHeader.sh_size += CBA.writeULEB128(PostLinkBrFreq.value_or(0));
+            CBA.writeULEB128(PostLinkBrFreq.value_or(0));
         }
       }
     }
   }
+  SHeader.sh_size += CBA.getOffset() - CurrentOffset;
 }
 
 template <class ELFT>

>From 25ae6ce4801f6f6addae5079323870d4191e7531 Mon Sep 17 00:00:00 2001
From: Victor Campos <victor.campos at arm.com>
Date: Wed, 24 Jun 2026 09:33:58 +0100
Subject: [PATCH 03/42] [libc] Refactor qsort code (#198781)

This patch makes the following changes:
 - Refactor the internal sorting functions to reduce code duplication.
- Move the testing machinery done for the testing of `qsort_r` to a
shared place.

These changes are done in anticipation to the introduction of Annex K's
`qsort_s`. This function shares most of its semantics with `qsort_r`,
therefore most of the testing logic can be shared between the two.
Besides, `qsort`, `qsort_r` and `qsort_s` are all very similar, hence we
can attempt to reduce duplication a bit more.
---
 libc/src/stdlib/qsort.cpp                 |   6 +-
 libc/src/stdlib/qsort_r.cpp               |   7 +-
 libc/src/stdlib/qsort_util.h              |  25 +++-
 libc/test/src/stdlib/CMakeLists.txt       |   3 +-
 libc/test/src/stdlib/QsortReentrantTest.h | 156 ++++++++++++++++++++++
 libc/test/src/stdlib/qsort_r_test.cpp     | 136 +------------------
 6 files changed, 184 insertions(+), 149 deletions(-)
 create mode 100644 libc/test/src/stdlib/QsortReentrantTest.h

diff --git a/libc/src/stdlib/qsort.cpp b/libc/src/stdlib/qsort.cpp
index f66b686d4e54b..46a74fb9118a3 100644
--- a/libc/src/stdlib/qsort.cpp
+++ b/libc/src/stdlib/qsort.cpp
@@ -18,11 +18,7 @@ LLVM_LIBC_FUNCTION(void, qsort,
                    (void *array, size_t array_size, size_t elem_size,
                     int (*compare)(const void *, const void *))) {
 
-  const auto is_less = [compare](const void *a, const void *b) -> bool {
-    return compare(a, b) < 0;
-  };
-
-  internal::unstable_sort(array, array_size, elem_size, is_less);
+  internal::unstable_sort(array, array_size, elem_size, compare);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_r.cpp b/libc/src/stdlib/qsort_r.cpp
index 47448201eddbd..65afcee77885d 100644
--- a/libc/src/stdlib/qsort_r.cpp
+++ b/libc/src/stdlib/qsort_r.cpp
@@ -18,12 +18,7 @@ LLVM_LIBC_FUNCTION(void, qsort_r,
                    (void *array, size_t array_size, size_t elem_size,
                     int (*compare)(const void *, const void *, void *),
                     void *arg)) {
-
-  const auto is_less = [compare, arg](const void *a, const void *b) -> bool {
-    return compare(a, b, arg) < 0;
-  };
-
-  internal::unstable_sort(array, array_size, elem_size, is_less);
+  internal::unstable_sort(array, array_size, elem_size, compare, arg);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/qsort_util.h b/libc/src/stdlib/qsort_util.h
index 7882b829d3274..2f0438d624d14 100644
--- a/libc/src/stdlib/qsort_util.h
+++ b/libc/src/stdlib/qsort_util.h
@@ -64,10 +64,29 @@ LIBC_INLINE void unstable_sort_impl(void *array, size_t array_len,
 }
 
 template <typename F>
+LIBC_INLINE void unstable_sort_dispatch(void *array, size_t array_len,
+                                        size_t elem_size, F is_less) {
+  constexpr bool USE_QUICK_SORT = (LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT);
+  unstable_sort_impl<USE_QUICK_SORT>(array, array_len, elem_size, is_less);
+}
+
+template <typename CmpFn>
+LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size,
+                               CmpFn compare) {
+  const auto is_less = [compare](const void *a, const void *b) -> bool {
+    return compare(a, b) < 0;
+  };
+  unstable_sort_dispatch(array, array_len, elem_size, is_less);
+}
+
+template <typename CmpFn>
 LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size,
-                               const F &is_less) {
-#define USE_QUICK_SORT ((LIBC_QSORT_IMPL) == (LIBC_QSORT_QUICK_SORT))
-  unstable_sort_impl<USE_QUICK_SORT, F>(array, array_len, elem_size, is_less);
+                               CmpFn compare, void *context) {
+  const auto is_less = [compare, context](const void *a,
+                                          const void *b) -> bool {
+    return compare(a, b, context) < 0;
+  };
+  unstable_sort_dispatch(array, array_len, elem_size, is_less);
 }
 
 } // namespace internal
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index 95e5a3ccc2bb5..2a7cd51d21e12 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -360,8 +360,9 @@ add_libc_test(
     libc-stdlib-tests
   SRCS
     qsort_r_test.cpp
+  HDRS
+    QsortReentrantTest.h
   DEPENDS
-    libc.hdr.types.size_t
     libc.src.stdlib.qsort_r
 )
 
diff --git a/libc/test/src/stdlib/QsortReentrantTest.h b/libc/test/src/stdlib/QsortReentrantTest.h
new file mode 100644
index 0000000000000..85d549098c740
--- /dev/null
+++ b/libc/test/src/stdlib/QsortReentrantTest.h
@@ -0,0 +1,156 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a template class for testing reentrant qsort functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "test/UnitTest/Test.h"
+
+/// Provides shared tests for reentrant qsort variants.
+///
+/// The fixture verifies that a qsort-like implementation correctly handles
+/// sorted data, reverse-sorted data, and comparators that dispatch through a
+/// type-erased context pointer.
+template <typename QsortFnTy, typename SizeTy>
+class QsortReentrantTest : public LIBC_NAMESPACE::testing::Test {
+private:
+  static int int_compare_count(const void *l, const void *r, void *count_arg) {
+    int li = *static_cast<const int *>(l);
+    int ri = *static_cast<const int *>(r);
+    SizeTy *count = static_cast<SizeTy *>(count_arg);
+    *count = *count + 1;
+    if (li == ri)
+      return 0;
+    if (li > ri)
+      return 1;
+    return -1;
+  }
+
+  struct PriorityVal {
+    int priority;
+    int size;
+  };
+
+  static int compare_priority_val(const PriorityVal *l, const PriorityVal *r) {
+    // Subtracting the priorities is unsafe, but it's fine for this test.
+    int priority_diff = l->priority - r->priority;
+    if (priority_diff != 0) {
+      return priority_diff;
+    }
+    if (l->size == r->size)
+      return 0;
+    if (l->size > r->size)
+      return 1;
+    return -1;
+  }
+
+  // The following test is intended to mimic the CPP library pattern of having a
+  // comparison function that takes a specific type, which is passed to a
+  // library that then needs to sort an array of that type. The library can't
+  // safely pass the comparison function to qsort because a function that takes
+  // const T* being cast to a function that takes const void* is undefined
+  // behavior. The safer pattern is to pass a type erased comparator that calls
+  // into the typed comparator to qsort_r.
+  template <typename T>
+  static int type_erased_comp(const void *l, const void *r,
+                              void *erased_func_ptr) {
+    using TypedComp = int (*)(const T *, const T *);
+    TypedComp typed_func_ptr = reinterpret_cast<TypedComp>(erased_func_ptr);
+    const T *lt = static_cast<const T *>(l);
+    const T *rt = static_cast<const T *>(r);
+    return typed_func_ptr(lt, rt);
+  }
+
+public:
+  void sorted_array(QsortFnTy func) {
+    int array[25] = {10,   23,   33,   35,   55,   70,    71,   100,  110,
+                     123,  133,  135,  155,  170,  171,   1100, 1110, 1123,
+                     1133, 1135, 1155, 1170, 1171, 11100, 12310};
+    constexpr SizeTy ARRAY_SIZE = sizeof(array) / sizeof(int);
+
+    SizeTy count = 0;
+
+    func(array, ARRAY_SIZE, sizeof(int), int_compare_count, &count);
+
+    ASSERT_LE(array[0], 10);
+    ASSERT_LE(array[1], 23);
+    ASSERT_LE(array[2], 33);
+    ASSERT_LE(array[3], 35);
+    ASSERT_LE(array[4], 55);
+    ASSERT_LE(array[5], 70);
+    ASSERT_LE(array[6], 71);
+    ASSERT_LE(array[7], 100);
+    ASSERT_LE(array[8], 110);
+    ASSERT_LE(array[9], 123);
+    ASSERT_LE(array[10], 133);
+    ASSERT_LE(array[11], 135);
+    ASSERT_LE(array[12], 155);
+    ASSERT_LE(array[13], 170);
+    ASSERT_LE(array[14], 171);
+    ASSERT_LE(array[15], 1100);
+    ASSERT_LE(array[16], 1110);
+    ASSERT_LE(array[17], 1123);
+    ASSERT_LE(array[18], 1133);
+    ASSERT_LE(array[19], 1135);
+    ASSERT_LE(array[20], 1155);
+    ASSERT_LE(array[21], 1170);
+    ASSERT_LE(array[22], 1171);
+    ASSERT_LE(array[23], 11100);
+    ASSERT_LE(array[24], 12310);
+
+    // This is a sorted list, but there still have to have been at least N - 1
+    // comparisons made.
+    ASSERT_GE(count, ARRAY_SIZE - 1);
+  }
+
+  void reverse_sorted_array(QsortFnTy func) {
+    int array[25] = {25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13,
+                     12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1};
+    constexpr SizeTy ARRAY_SIZE = sizeof(array) / sizeof(int);
+
+    SizeTy count = 0;
+
+    func(array, ARRAY_SIZE, sizeof(int), int_compare_count, &count);
+
+    for (int i = 0; i < int(ARRAY_SIZE - 1); ++i)
+      ASSERT_LE(array[i], i + 1);
+
+    ASSERT_GE(count, ARRAY_SIZE);
+  }
+
+  void safe_type_erasure(QsortFnTy func) {
+    PriorityVal array[5] = {
+        {10, 3}, {1, 10}, {-1, 100}, {10, 0}, {3, 3},
+    };
+    constexpr SizeTy ARRAY_SIZE = sizeof(array) / sizeof(PriorityVal);
+
+    func(array, ARRAY_SIZE, sizeof(PriorityVal), type_erased_comp<PriorityVal>,
+         reinterpret_cast<void *>(compare_priority_val));
+
+    EXPECT_EQ(array[0].priority, -1);
+    EXPECT_EQ(array[0].size, 100);
+    EXPECT_EQ(array[1].priority, 1);
+    EXPECT_EQ(array[1].size, 10);
+    EXPECT_EQ(array[2].priority, 3);
+    EXPECT_EQ(array[2].size, 3);
+    EXPECT_EQ(array[3].priority, 10);
+    EXPECT_EQ(array[3].size, 0);
+    EXPECT_EQ(array[4].priority, 10);
+    EXPECT_EQ(array[4].size, 3);
+  }
+};
+
+#define QSORTREENTRANT_TEST(name, func, sizetype)                              \
+  using LlvmLibc##name##Test = QsortReentrantTest<decltype(func), sizetype>;   \
+  TEST_F(LlvmLibc##name##Test, SortedArray) { sorted_array(func); }            \
+  TEST_F(LlvmLibc##name##Test, ReverseSortedArray) {                           \
+    reverse_sorted_array(func);                                                \
+  }                                                                            \
+  TEST_F(LlvmLibc##name##Test, SafeTypeErasure) { safe_type_erasure(func); }
diff --git a/libc/test/src/stdlib/qsort_r_test.cpp b/libc/test/src/stdlib/qsort_r_test.cpp
index f18923618ed5e..b1fff5a1bc3dd 100644
--- a/libc/test/src/stdlib/qsort_r_test.cpp
+++ b/libc/test/src/stdlib/qsort_r_test.cpp
@@ -6,139 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "QsortReentrantTest.h"
 #include "src/stdlib/qsort_r.h"
 
-#include "test/UnitTest/Test.h"
-
-#include "hdr/types/size_t.h"
-
-static int int_compare_count(const void *l, const void *r, void *count_arg) {
-  int li = *reinterpret_cast<const int *>(l);
-  int ri = *reinterpret_cast<const int *>(r);
-  size_t *count = reinterpret_cast<size_t *>(count_arg);
-  *count = *count + 1;
-  if (li == ri)
-    return 0;
-  else if (li > ri)
-    return 1;
-  else
-    return -1;
-}
-
-TEST(LlvmLibcQsortRTest, SortedArray) {
-  int array[25] = {10,   23,   33,   35,   55,   70,    71,   100,  110,
-                   123,  133,  135,  155,  170,  171,   1100, 1110, 1123,
-                   1133, 1135, 1155, 1170, 1171, 11100, 12310};
-  constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-  size_t count = 0;
-
-  LIBC_NAMESPACE::qsort_r(array, ARRAY_SIZE, sizeof(int), int_compare_count,
-                          &count);
-
-  ASSERT_LE(array[0], 10);
-  ASSERT_LE(array[1], 23);
-  ASSERT_LE(array[2], 33);
-  ASSERT_LE(array[3], 35);
-  ASSERT_LE(array[4], 55);
-  ASSERT_LE(array[5], 70);
-  ASSERT_LE(array[6], 71);
-  ASSERT_LE(array[7], 100);
-  ASSERT_LE(array[8], 110);
-  ASSERT_LE(array[9], 123);
-  ASSERT_LE(array[10], 133);
-  ASSERT_LE(array[11], 135);
-  ASSERT_LE(array[12], 155);
-  ASSERT_LE(array[13], 170);
-  ASSERT_LE(array[14], 171);
-  ASSERT_LE(array[15], 1100);
-  ASSERT_LE(array[16], 1110);
-  ASSERT_LE(array[17], 1123);
-  ASSERT_LE(array[18], 1133);
-  ASSERT_LE(array[19], 1135);
-  ASSERT_LE(array[20], 1155);
-  ASSERT_LE(array[21], 1170);
-  ASSERT_LE(array[22], 1171);
-  ASSERT_LE(array[23], 11100);
-  ASSERT_LE(array[24], 12310);
-
-  // This is a sorted list, but there still have to have been at least N - 1
-  // comparisons made.
-  ASSERT_GE(count, ARRAY_SIZE - 1);
-}
-
-TEST(LlvmLibcQsortRTest, ReverseSortedArray) {
-  int array[25] = {25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13,
-                   12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1};
-  constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int);
-
-  size_t count = 0;
-
-  LIBC_NAMESPACE::qsort_r(array, ARRAY_SIZE, sizeof(int), int_compare_count,
-                          &count);
-
-  for (int i = 0; i < int(ARRAY_SIZE - 1); ++i)
-    ASSERT_LE(array[i], i + 1);
-
-  ASSERT_GE(count, ARRAY_SIZE);
-}
-
-// The following test is intended to mimic the CPP library pattern of having a
-// comparison function that takes a specific type, which is passed to a library
-// that then needs to sort an array of that type. The library can't safely pass
-// the comparison function to qsort because a function that takes const T*
-// being cast to a function that takes const void* is undefined behavior. The
-// safer pattern is to pass a type erased comparator that calls into the typed
-// comparator to qsort_r.
-
-struct PriorityVal {
-  int priority;
-  int size;
-};
-
-static int compare_priority_val(const PriorityVal *l, const PriorityVal *r) {
-  // Subtracting the priorities is unsafe, but it's fine for this test.
-  int priority_diff = l->priority - r->priority;
-  if (priority_diff != 0) {
-    return priority_diff;
-  }
-  if (l->size == r->size) {
-    return 0;
-  } else if (l->size > r->size) {
-    return 1;
-  } else {
-    return -1;
-  }
-}
-
-template <typename T>
-static int type_erased_comp(const void *l, const void *r,
-                            void *erased_func_ptr) {
-  typedef int (*TypedComp)(const T *, const T *);
-  TypedComp typed_func_ptr = reinterpret_cast<TypedComp>(erased_func_ptr);
-  const T *lt = reinterpret_cast<const T *>(l);
-  const T *rt = reinterpret_cast<const T *>(r);
-  return typed_func_ptr(lt, rt);
-}
-
-TEST(LlvmLibcQsortRTest, SafeTypeErasure) {
-  PriorityVal array[5] = {
-      {10, 3}, {1, 10}, {-1, 100}, {10, 0}, {3, 3},
-  };
-  constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(PriorityVal);
-
-  LIBC_NAMESPACE::qsort_r(array, ARRAY_SIZE, sizeof(PriorityVal),
-                          type_erased_comp<PriorityVal>,
-                          reinterpret_cast<void *>(compare_priority_val));
-
-  EXPECT_EQ(array[0].priority, -1);
-  EXPECT_EQ(array[0].size, 100);
-  EXPECT_EQ(array[1].priority, 1);
-  EXPECT_EQ(array[1].size, 10);
-  EXPECT_EQ(array[2].priority, 3);
-  EXPECT_EQ(array[2].size, 3);
-  EXPECT_EQ(array[3].priority, 10);
-  EXPECT_EQ(array[3].size, 0);
-  EXPECT_EQ(array[4].priority, 10);
-  EXPECT_EQ(array[4].size, 3);
-}
+QSORTREENTRANT_TEST(QsortR, LIBC_NAMESPACE::qsort_r, size_t)

>From 099b1f6aeec8058460598106ff0b68fab66b12dc Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Wed, 24 Jun 2026 09:34:01 +0100
Subject: [PATCH 04/42] [compiler-rt][ARM] Fix underflow handling in new
 divdf3.S (#204784)

The code which calculates the 'errsign' parameter to pass to
`__compiler_rt_dunder` was wrong in two ways. It calculated the value
with the wrong sign, and also in the wrong register, r12 rather than r2!
In this code's original context, both of those things made sense (the
'dunder' function had a nonstandard ABI). Somehow none of the existing
test cases detected the problem.

We found this bug in a test case downstream that only failed big-endian
(because that changes which half of the denominator mantissa is left in
r2 to be accidentally used as errsign). However, the new test cases here
are designed to detect the failure in both endiannesses.
---
 compiler-rt/lib/builtins/arm/divdf3.S          |  8 ++++----
 .../test/builtins/Unit/divdf3new_test.c        | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/builtins/arm/divdf3.S b/compiler-rt/lib/builtins/arm/divdf3.S
index ebbd868a04b17..213483e38ea13 100644
--- a/compiler-rt/lib/builtins/arm/divdf3.S
+++ b/compiler-rt/lib/builtins/arm/divdf3.S
@@ -497,11 +497,11 @@ LOCAL_LABEL(ddiv_underflow):
   // was rounded up), or negative if the quotient was rounded down. But we must
   // also distinguish the third case of the residual being exactly zero.
   add     xh, xh, #0x60000000     // apply IEEE 754 exponent bias for __dunder
-  orrs    r12, r6, r8             // set r12=0 and Z=1 if quotient was exact
-  movne   r12, #1                 // otherwise, set r12 = +1
-  orrne   r12, r12, r6, asr #31   // and change to -1 if residual is negative
+  orrs    r2, r6, r8              // set r2=0 and Z=1 if quotient was exact
+  mvnne   r2, r6, asr #31         // otherwise, r2 = {-1,0} for {+,-} residual
+  orrne   r2, r2, #1              // and then turn that into {-1,+1}
   pop     {r4,r5,r6,r7,r8,lr}     // pop all locally saved registers
-  b       SYMBOL_NAME(__compiler_rt_dunder)                // and tailcall __dunder to finish
+  b       SYMBOL_NAME(__compiler_rt_dunder) // and tailcall dunder to finish
 
 LOCAL_LABEL(ddiv_zerodenorm):
   // We come here if either input had exponent 0, so there's at least one zero
diff --git a/compiler-rt/test/builtins/Unit/divdf3new_test.c b/compiler-rt/test/builtins/Unit/divdf3new_test.c
index 866c7cb08e519..8fd22951d62ae 100644
--- a/compiler-rt/test/builtins/Unit/divdf3new_test.c
+++ b/compiler-rt/test/builtins/Unit/divdf3new_test.c
@@ -107,6 +107,24 @@ int main(void) {
       test__divdf3(0x0000000000000009, 0x4022000000000000, 0x0000000000000001);
   status |=
       test__divdf3(0x0000000000000009, 0xc022000000000000, 0x8000000000000001);
+  status |=
+      test__divdf3(0x0008000000000092, 0x4010000000000000, 0x0002000000000024);
+  status |=
+      test__divdf3(0x0010000000000008, 0x4030000040000000, 0x0000fffffc000010);
+  status |=
+      test__divdf3(0x0010000000000008, 0x4030000080000000, 0x0000fffff8000040);
+  status |=
+      test__divdf3(0x0010000000000018, 0x4030000040000000, 0x0000fffffc000011);
+  status |=
+      test__divdf3(0x0010000000000018, 0x4030000080000000, 0x0000fffff8000041);
+  status |=
+      test__divdf3(0x0010000001000008, 0x401fffff80000000, 0x0002000008200022);
+  status |=
+      test__divdf3(0x0010000001000010, 0x401fffff80000000, 0x0002000008200023);
+  status |=
+      test__divdf3(0x001000000f00000a, 0x401fffff40000000, 0x000200000de00055);
+  status |=
+      test__divdf3(0x001000000f000012, 0x401fffff40000000, 0x000200000de00056);
   status |=
       test__divdf3(0x000ffffffffffff7, 0x3feffffffffffffe, 0x000ffffffffffff8);
   status |=

>From a71292356656e0c516019aa6b571baeb572a7624 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Wed, 24 Jun 2026 11:17:03 +0200
Subject: [PATCH 05/42] [libc] Add IPv4 socket options and related structs
 (#204787)

This patch adds struct ip_mreq, ip_mreq_source, ip_mreqn, ip_opts, and
ip_msfilter to <netinet/in.h>, along with IP level socket option macros
(IP_TOS, IP_TTL, IP_ADD_MEMBERSHIP, etc.).

I add basic unit tests verifying the size and member offsets of the new
structures against standard layout expectations, mainly to make sure
that the files are used /somewhere/.

Assisted by Gemini.
---
 libc/hdr/types/CMakeLists.txt                 | 45 +++++++++++
 libc/hdr/types/struct_ip_mreq.h               | 26 ++++++
 libc/hdr/types/struct_ip_mreq_source.h        | 26 ++++++
 libc/hdr/types/struct_ip_mreqn.h              | 26 ++++++
 libc/hdr/types/struct_ip_msfilter.h           | 26 ++++++
 libc/hdr/types/struct_ip_opts.h               | 26 ++++++
 libc/include/CMakeLists.txt                   |  5 ++
 libc/include/llvm-libc-types/CMakeLists.txt   |  5 ++
 libc/include/llvm-libc-types/struct_ip_mreq.h | 24 ++++++
 .../llvm-libc-types/struct_ip_mreq_source.h   | 25 ++++++
 .../include/llvm-libc-types/struct_ip_mreqn.h | 25 ++++++
 .../llvm-libc-types/struct_ip_msfilter.h      | 28 +++++++
 libc/include/llvm-libc-types/struct_ip_opts.h | 24 ++++++
 libc/include/netinet/in.yaml                  | 81 +++++++++++++++++++
 libc/test/src/netinet/CMakeLists.txt          |  5 ++
 libc/test/src/netinet/in_test.cpp             | 38 +++++++++
 16 files changed, 435 insertions(+)
 create mode 100644 libc/hdr/types/struct_ip_mreq.h
 create mode 100644 libc/hdr/types/struct_ip_mreq_source.h
 create mode 100644 libc/hdr/types/struct_ip_mreqn.h
 create mode 100644 libc/hdr/types/struct_ip_msfilter.h
 create mode 100644 libc/hdr/types/struct_ip_opts.h
 create mode 100644 libc/include/llvm-libc-types/struct_ip_mreq.h
 create mode 100644 libc/include/llvm-libc-types/struct_ip_mreq_source.h
 create mode 100644 libc/include/llvm-libc-types/struct_ip_mreqn.h
 create mode 100644 libc/include/llvm-libc-types/struct_ip_msfilter.h
 create mode 100644 libc/include/llvm-libc-types/struct_ip_opts.h

diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index 1742e423b1d00..a130f7ee0000a 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -455,6 +455,51 @@ add_proxy_header_library(
     libc.include.netinet_in
 )
 
+add_proxy_header_library(
+  struct_ip_mreq
+  HDRS
+    struct_ip_mreq.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.struct_ip_mreq
+    libc.include.netinet_in
+)
+
+add_proxy_header_library(
+  struct_ip_mreq_source
+  HDRS
+    struct_ip_mreq_source.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.struct_ip_mreq_source
+    libc.include.netinet_in
+)
+
+add_proxy_header_library(
+  struct_ip_mreqn
+  HDRS
+    struct_ip_mreqn.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.struct_ip_mreqn
+    libc.include.netinet_in
+)
+
+add_proxy_header_library(
+  struct_ip_msfilter
+  HDRS
+    struct_ip_msfilter.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.struct_ip_msfilter
+    libc.include.netinet_in
+)
+
+add_proxy_header_library(
+  struct_ip_opts
+  HDRS
+    struct_ip_opts.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.struct_ip_opts
+    libc.include.netinet_in
+)
+
 add_proxy_header_library(
   in_addr_t
   HDRS
diff --git a/libc/hdr/types/struct_ip_mreq.h b/libc/hdr/types/struct_ip_mreq.h
new file mode 100644
index 0000000000000..9799f892bf7ef
--- /dev/null
+++ b/libc/hdr/types/struct_ip_mreq.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for struct ip_mreq.
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_H
+#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/struct_ip_mreq.h"
+
+#else
+
+#include <netinet/in.h>
+
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_H
diff --git a/libc/hdr/types/struct_ip_mreq_source.h b/libc/hdr/types/struct_ip_mreq_source.h
new file mode 100644
index 0000000000000..d19fd6cd19132
--- /dev/null
+++ b/libc/hdr/types/struct_ip_mreq_source.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for struct ip_mreq_source.
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_SOURCE_H
+#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_SOURCE_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/struct_ip_mreq_source.h"
+
+#else
+
+#include <netinet/in.h>
+
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_SOURCE_H
diff --git a/libc/hdr/types/struct_ip_mreqn.h b/libc/hdr/types/struct_ip_mreqn.h
new file mode 100644
index 0000000000000..9672e2408f705
--- /dev/null
+++ b/libc/hdr/types/struct_ip_mreqn.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for struct ip_mreqn.
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQN_H
+#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQN_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/struct_ip_mreqn.h"
+
+#else
+
+#include <netinet/in.h>
+
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQN_H
diff --git a/libc/hdr/types/struct_ip_msfilter.h b/libc/hdr/types/struct_ip_msfilter.h
new file mode 100644
index 0000000000000..7434815d95fd3
--- /dev/null
+++ b/libc/hdr/types/struct_ip_msfilter.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for struct ip_msfilter.
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_MSFILTER_H
+#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_MSFILTER_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/struct_ip_msfilter.h"
+
+#else
+
+#include <netinet/in.h>
+
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_MSFILTER_H
diff --git a/libc/hdr/types/struct_ip_opts.h b/libc/hdr/types/struct_ip_opts.h
new file mode 100644
index 0000000000000..44f922f941da0
--- /dev/null
+++ b/libc/hdr/types/struct_ip_opts.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for struct ip_opts.
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_OPTS_H
+#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_OPTS_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/struct_ip_opts.h"
+
+#else
+
+#include <netinet/in.h>
+
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_OPTS_H
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index ab24c854692d0..bb670b614742a 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -235,6 +235,11 @@ add_header_macro(
     .llvm-libc-types.struct_sockaddr_in6
     .llvm-libc-types.struct_in_addr
     .llvm-libc-types.struct_in6_addr
+    .llvm-libc-types.struct_ip_mreq
+    .llvm-libc-types.struct_ip_mreq_source
+    .llvm-libc-types.struct_ip_mreqn
+    .llvm-libc-types.struct_ip_msfilter
+    .llvm-libc-types.struct_ip_opts
     .llvm_libc_common_h
 )
 
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index c0c600983ba12..0512d3f0e642a 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -68,6 +68,11 @@ add_header(in_addr_t HDR in_addr_t.h)
 add_header(in_port_t HDR in_port_t.h DEPENDS libc.include.llvm-libc-macros.stdint_macros)
 add_header(struct_in_addr HDR struct_in_addr.h DEPENDS .in_addr_t)
 add_header(struct_in6_addr HDR struct_in6_addr.h DEPENDS libc.include.llvm-libc-macros.stdint_macros)
+add_header(struct_ip_mreq HDR struct_ip_mreq.h DEPENDS .struct_in_addr)
+add_header(struct_ip_mreq_source HDR struct_ip_mreq_source.h DEPENDS .struct_in_addr)
+add_header(struct_ip_mreqn HDR struct_ip_mreqn.h DEPENDS .struct_in_addr)
+add_header(struct_ip_msfilter HDR struct_ip_msfilter.h DEPENDS libc.include.llvm-libc-macros.stdint_macros .struct_in_addr)
+add_header(struct_ip_opts HDR struct_ip_opts.h DEPENDS .struct_in_addr)
 add_header(ino_t HDR ino_t.h)
 add_header(key_t HDR key_t.h)
 add_header(mbstate_t HDR mbstate_t.h)
diff --git a/libc/include/llvm-libc-types/struct_ip_mreq.h b/libc/include/llvm-libc-types/struct_ip_mreq.h
new file mode 100644
index 0000000000000..614a38d8d95c6
--- /dev/null
+++ b/libc/include/llvm-libc-types/struct_ip_mreq.h
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of struct ip_mreq.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_STRUCT_IP_MREQ_H
+#define LLVM_LIBC_TYPES_STRUCT_IP_MREQ_H
+
+#include "struct_in_addr.h"
+
+struct ip_mreq {
+  struct in_addr imr_multiaddr;
+  struct in_addr imr_interface;
+};
+
+#endif // LLVM_LIBC_TYPES_STRUCT_IP_MREQ_H
diff --git a/libc/include/llvm-libc-types/struct_ip_mreq_source.h b/libc/include/llvm-libc-types/struct_ip_mreq_source.h
new file mode 100644
index 0000000000000..565c92ba43c7d
--- /dev/null
+++ b/libc/include/llvm-libc-types/struct_ip_mreq_source.h
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of struct ip_mreq_source.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_STRUCT_IP_MREQ_SOURCE_H
+#define LLVM_LIBC_TYPES_STRUCT_IP_MREQ_SOURCE_H
+
+#include "struct_in_addr.h"
+
+struct ip_mreq_source {
+  struct in_addr imr_multiaddr;
+  struct in_addr imr_interface;
+  struct in_addr imr_sourceaddr;
+};
+
+#endif // LLVM_LIBC_TYPES_STRUCT_IP_MREQ_SOURCE_H
diff --git a/libc/include/llvm-libc-types/struct_ip_mreqn.h b/libc/include/llvm-libc-types/struct_ip_mreqn.h
new file mode 100644
index 0000000000000..7fa7aaa2dd09b
--- /dev/null
+++ b/libc/include/llvm-libc-types/struct_ip_mreqn.h
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of struct ip_mreqn.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_STRUCT_IP_MREQN_H
+#define LLVM_LIBC_TYPES_STRUCT_IP_MREQN_H
+
+#include "struct_in_addr.h"
+
+struct ip_mreqn {
+  struct in_addr imr_multiaddr;
+  struct in_addr imr_address;
+  int imr_ifindex;
+};
+
+#endif // LLVM_LIBC_TYPES_STRUCT_IP_MREQN_H
diff --git a/libc/include/llvm-libc-types/struct_ip_msfilter.h b/libc/include/llvm-libc-types/struct_ip_msfilter.h
new file mode 100644
index 0000000000000..6a5c4097cafcd
--- /dev/null
+++ b/libc/include/llvm-libc-types/struct_ip_msfilter.h
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of struct ip_msfilter.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_STRUCT_IP_MSFILTER_H
+#define LLVM_LIBC_TYPES_STRUCT_IP_MSFILTER_H
+
+#include "../llvm-libc-macros/stdint-macros.h"
+#include "struct_in_addr.h"
+
+struct ip_msfilter {
+  struct in_addr imsf_multiaddr;
+  struct in_addr imsf_interface;
+  uint32_t imsf_fmode;
+  uint32_t imsf_numsrc;
+  struct in_addr imsf_slist[1]; // Variable size.
+};
+
+#endif // LLVM_LIBC_TYPES_STRUCT_IP_MSFILTER_H
diff --git a/libc/include/llvm-libc-types/struct_ip_opts.h b/libc/include/llvm-libc-types/struct_ip_opts.h
new file mode 100644
index 0000000000000..18c6d638cbcd4
--- /dev/null
+++ b/libc/include/llvm-libc-types/struct_ip_opts.h
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of struct ip_opts.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_STRUCT_IP_OPTS_H
+#define LLVM_LIBC_TYPES_STRUCT_IP_OPTS_H
+
+#include "struct_in_addr.h"
+
+struct ip_opts {
+  struct in_addr ip_dst;
+  char ip_opts[40]; // Variable size.
+};
+
+#endif // LLVM_LIBC_TYPES_STRUCT_IP_OPTS_H
diff --git a/libc/include/netinet/in.yaml b/libc/include/netinet/in.yaml
index e4eb7d65bb42d..0244b160cb601 100644
--- a/libc/include/netinet/in.yaml
+++ b/libc/include/netinet/in.yaml
@@ -24,6 +24,82 @@ macros:
     macro_header: inet-address-macros.h
   - macro_name: INET6_ADDRSTRLEN
     macro_header: inet-address-macros.h
+
+  - macro_name: SOL_IP
+    macro_value: 0
+
+  # IP level socket options
+  - macro_name: IP_TOS
+    macro_value: 1 # int
+  - macro_name: IP_TTL
+    macro_value: 2 # int
+  - macro_name: IP_HDRINCL
+    macro_value: 3 # int
+  - macro_name: IP_OPTIONS
+    macro_value: 4 # struct ip_opts
+  - macro_name: IP_ROUTER_ALERT
+    macro_value: 5 # int
+  - macro_name: IP_RECVOPTS
+    macro_value: 6 # int
+  - macro_name: IP_RETOPTS
+    macro_value: 7 # int
+  - macro_name: IP_PKTINFO
+    macro_value: 8 # int
+  - macro_name: IP_MTU_DISCOVER
+    macro_value: 10 # int
+  - macro_name: IP_PMTUDISC_DONT
+    macro_value: 0
+  - macro_name: IP_PMTUDISC_WANT
+    macro_value: 1
+  - macro_name: IP_PMTUDISC_DO
+    macro_value: 2
+  - macro_name: IP_PMTUDISC_PROBE
+    macro_value: 3
+  - macro_name: IP_RECVERR
+    macro_value: 11 # int
+  - macro_name: IP_RECVTTL
+    macro_value: 12 # int
+  - macro_name: IP_RECVTOS
+    macro_value: 13 # int
+  - macro_name: IP_MTU
+    macro_value: 14 # int
+  - macro_name: IP_FREEBIND
+    macro_value: 15 # int
+  - macro_name: IP_PASSSEC
+    macro_value: 18 # int
+  - macro_name: IP_TRANSPARENT
+    macro_value: 19 # int
+  - macro_name: IP_RECVORIGDSTADDR
+    macro_value: 20 # int
+  - macro_name: IP_NODEFRAG
+    macro_value: 22 # int
+  - macro_name: IP_BIND_ADDRESS_NO_PORT
+    macro_value: 24 # int
+  - macro_name: IP_MULTICAST_IF
+    macro_value: 32 # struct in_addr or ip_mreq or ip_mreqn
+  - macro_name: IP_MULTICAST_TTL
+    macro_value: 33 # int
+  - macro_name: IP_MULTICAST_LOOP
+    macro_value: 34 # int
+  - macro_name: IP_ADD_MEMBERSHIP
+    macro_value: 35 # struct ip_mreq or ip_mreqn
+  - macro_name: IP_DROP_MEMBERSHIP
+    macro_value: 36 # struct ip_mreq or ip_mreqn
+  - macro_name: IP_UNBLOCK_SOURCE
+    macro_value: 37 # struct ip_mreq_source
+  - macro_name: IP_BLOCK_SOURCE
+    macro_value: 38 # struct ip_mreq_source
+  - macro_name: IP_ADD_SOURCE_MEMBERSHIP
+    macro_value: 39 # struct ip_mreq_source
+  - macro_name: IP_DROP_SOURCE_MEMBERSHIP
+    macro_value: 40 # struct ip_mreq_source
+  - macro_name: IP_MSFILTER
+    macro_value: 41 # struct ip_msfilter
+  - macro_name: IP_MULTICAST_ALL
+    macro_value: 49 # int
+  - macro_name: IP_LOCAL_PORT_RANGE
+    macro_value: 51 # uint32_t
+
 types:
   - type_name: in_port_t
   - type_name: in_addr_t
@@ -32,6 +108,11 @@ types:
   - type_name: struct_sockaddr_in6
   - type_name: struct_in_addr
   - type_name: struct_in6_addr
+  - type_name: struct_ip_mreq
+  - type_name: struct_ip_mreq_source
+  - type_name: struct_ip_mreqn
+  - type_name: struct_ip_msfilter
+  - type_name: struct_ip_opts
 enums: []
 objects:
   - object_name: in6addr_any
diff --git a/libc/test/src/netinet/CMakeLists.txt b/libc/test/src/netinet/CMakeLists.txt
index 605f70ff193cd..f1adcf0f56f17 100644
--- a/libc/test/src/netinet/CMakeLists.txt
+++ b/libc/test/src/netinet/CMakeLists.txt
@@ -9,6 +9,11 @@ add_libc_unittest(
   DEPENDS
     libc.hdr.netinet_in_macros
     libc.hdr.types.struct_in6_addr
+    libc.hdr.types.struct_ip_mreq
+    libc.hdr.types.struct_ip_mreq_source
+    libc.hdr.types.struct_ip_mreqn
+    libc.hdr.types.struct_ip_msfilter
+    libc.hdr.types.struct_ip_opts
     libc.hdr.types.struct_sockaddr_in6
     libc.src.arpa.inet.htons
     libc.src.arpa.inet.htonl
diff --git a/libc/test/src/netinet/in_test.cpp b/libc/test/src/netinet/in_test.cpp
index fb4c3be335605..eccfdf37d5514 100644
--- a/libc/test/src/netinet/in_test.cpp
+++ b/libc/test/src/netinet/in_test.cpp
@@ -18,6 +18,11 @@
 
 #include "hdr/netinet_in_macros.h"
 #include "hdr/types/struct_in6_addr.h"
+#include "hdr/types/struct_ip_mreq.h"
+#include "hdr/types/struct_ip_mreq_source.h"
+#include "hdr/types/struct_ip_mreqn.h"
+#include "hdr/types/struct_ip_msfilter.h"
+#include "hdr/types/struct_ip_opts.h"
 #include "hdr/types/struct_sockaddr_in6.h"
 #include "src/netinet/in6addr_any.h"
 #include "src/netinet/in6addr_loopback.h"
@@ -92,3 +97,36 @@ TEST(LlvmLibcNetinetInTest, SockaddrIn6Layout) {
             static_cast<size_t>(24));
   EXPECT_EQ(sizeof(struct sockaddr_in6), static_cast<size_t>(28));
 }
+
+TEST(LlvmLibcNetinetInTest, IpOptionLayout) {
+  EXPECT_EQ(sizeof(struct ip_mreq), static_cast<size_t>(8));
+  EXPECT_EQ(sizeof(struct ip_mreq_source), static_cast<size_t>(12));
+  EXPECT_EQ(sizeof(struct ip_mreqn), static_cast<size_t>(12));
+  EXPECT_EQ(sizeof(struct ip_msfilter), static_cast<size_t>(20));
+  EXPECT_EQ(sizeof(struct ip_opts), static_cast<size_t>(44));
+
+  EXPECT_EQ(offsetof(struct ip_mreq, imr_multiaddr), static_cast<size_t>(0));
+  EXPECT_EQ(offsetof(struct ip_mreq, imr_interface), static_cast<size_t>(4));
+
+  EXPECT_EQ(offsetof(struct ip_mreq_source, imr_multiaddr),
+            static_cast<size_t>(0));
+  EXPECT_EQ(offsetof(struct ip_mreq_source, imr_interface),
+            static_cast<size_t>(4));
+  EXPECT_EQ(offsetof(struct ip_mreq_source, imr_sourceaddr),
+            static_cast<size_t>(8));
+
+  EXPECT_EQ(offsetof(struct ip_mreqn, imr_multiaddr), static_cast<size_t>(0));
+  EXPECT_EQ(offsetof(struct ip_mreqn, imr_address), static_cast<size_t>(4));
+  EXPECT_EQ(offsetof(struct ip_mreqn, imr_ifindex), static_cast<size_t>(8));
+
+  EXPECT_EQ(offsetof(struct ip_msfilter, imsf_multiaddr),
+            static_cast<size_t>(0));
+  EXPECT_EQ(offsetof(struct ip_msfilter, imsf_interface),
+            static_cast<size_t>(4));
+  EXPECT_EQ(offsetof(struct ip_msfilter, imsf_fmode), static_cast<size_t>(8));
+  EXPECT_EQ(offsetof(struct ip_msfilter, imsf_numsrc), static_cast<size_t>(12));
+  EXPECT_EQ(offsetof(struct ip_msfilter, imsf_slist), static_cast<size_t>(16));
+
+  EXPECT_EQ(offsetof(struct ip_opts, ip_dst), static_cast<size_t>(0));
+  EXPECT_EQ(offsetof(struct ip_opts, ip_opts), static_cast<size_t>(4));
+}

>From a562f6a4833eb15d8d2fdbd54c27c595935ff275 Mon Sep 17 00:00:00 2001
From: jeanPerier <jperier at nvidia.com>
Date: Wed, 24 Jun 2026 11:35:28 +0200
Subject: [PATCH 06/42] [flang][FIR] add canonicalization pattern for fir.if
 returning OPTIONAL (#205353)

Lowering is generating patterns when forwarding OPTIONAL in calls that
looks like:

```
   %present = fir.is_present %var : (T) -> i1
    %if_result = fir.if %present -> (T) {
      fir.result %var : T
    } else {
      %absent = fir.absent T
      fir.result %absent : T
    }
```

This specific pattern is a no-op and `%var` can be used directly. The
lowering logic that generates such patterns is inside non trivial
compiler code that has to deal with more complex scenarios where the
code inside the fir.if is more complex. Add a FIR pattern to
canonicalize such code to help with later analysis (like aliasing).
---
 .../include/flang/Optimizer/Dialect/FIROps.td |  2 +
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 59 +++++++++++++++++
 flang/test/Fir/present-absent-if-fold.fir     | 66 +++++++++++++++++++
 3 files changed, 127 insertions(+)
 create mode 100644 flang/test/Fir/present-absent-if-fold.fir

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 35a87c29d0cb6..c6e4d1b3b4d11 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2560,6 +2560,8 @@ def fir_IfOp
         "bool":$withElseRegion)>
   ];
 
+  let hasCanonicalizer = 1;
+
   let extraClassDeclaration = [{
     mlir::OpBuilder getThenBodyBuilder() {
       assert(!getThenRegion().empty() && "Unexpected empty 'where' region.");
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 1d61989eba6cf..ba047e71d6aa3 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -5690,6 +5690,65 @@ void fir::IfOp::resultToSourceOps(llvm::SmallVectorImpl<mlir::Value> &results,
     results.push_back(term->getOperand(resultNum));
 }
 
+// Fold away a fir.if that only forwards an optional argument or returns
+// fir.absent when it is not present:
+//
+//   %present = fir.is_present %var : (T) -> i1
+//   %r = fir.if %present -> (T) {
+//     fir.result %var : T
+//   } else {
+//     %absent = fir.absent T
+//     fir.result %absent : T
+//   }
+//
+// The result is always %var: optional arguments already encode presence.
+struct FoldPresentAbsentIfOp : public mlir::OpRewritePattern<fir::IfOp> {
+  using mlir::OpRewritePattern<fir::IfOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(fir::IfOp ifOp,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (ifOp.getNumResults() != 1)
+      return mlir::failure();
+
+    auto isPresentOp = ifOp.getCondition().getDefiningOp<fir::IsPresentOp>();
+    if (!isPresentOp)
+      return mlir::failure();
+
+    mlir::Value optionalVal = isPresentOp.getVal();
+    mlir::Type resultType = ifOp.getResult(0).getType();
+    if (optionalVal.getType() != resultType)
+      return mlir::failure();
+
+    mlir::Block &thenBlock = ifOp.getThenRegion().front();
+    if (thenBlock.getOperations().size() != 1)
+      return mlir::failure();
+    auto thenResult = mlir::dyn_cast<fir::ResultOp>(thenBlock.getTerminator());
+    if (!thenResult || thenResult.getNumOperands() != 1 ||
+        thenResult.getOperand(0) != optionalVal)
+      return mlir::failure();
+
+    if (ifOp.getElseRegion().empty())
+      return mlir::failure();
+    mlir::Block &elseBlock = ifOp.getElseRegion().front();
+    if (elseBlock.getOperations().size() > 2)
+      return mlir::failure();
+    auto elseResult = mlir::dyn_cast<fir::ResultOp>(elseBlock.getTerminator());
+    if (!elseResult || elseResult.getNumOperands() != 1)
+      return mlir::failure();
+    if (!elseResult.getOperand(0).getDefiningOp<fir::AbsentOp>())
+      return mlir::failure();
+
+    rewriter.replaceOp(ifOp, optionalVal);
+    return mlir::success();
+  }
+};
+
+void fir::IfOp::getCanonicalizationPatterns(mlir::RewritePatternSet &patterns,
+                                            mlir::MLIRContext *context) {
+  patterns.add<FoldPresentAbsentIfOp>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // BoxOffsetOp
 //===----------------------------------------------------------------------===//
diff --git a/flang/test/Fir/present-absent-if-fold.fir b/flang/test/Fir/present-absent-if-fold.fir
new file mode 100644
index 0000000000000..19fd7859f4a23
--- /dev/null
+++ b/flang/test/Fir/present-absent-if-fold.fir
@@ -0,0 +1,66 @@
+// RUN: fir-opt --canonicalize %s | FileCheck %s
+
+// CHECK-LABEL: func.func @fold_present_absent_if_box(
+// CHECK-SAME:     %[[VAR:.*]]: !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {
+// CHECK-NOT: fir.if
+// CHECK-NOT: fir.is_present
+// CHECK-NOT: fir.absent
+// CHECK: return %[[VAR]] : !fir.box<!fir.array<?xf32>>
+func.func @fold_present_absent_if_box(
+    %var: !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {
+  %present = fir.is_present %var : (!fir.box<!fir.array<?xf32>>) -> i1
+  %if_result = fir.if %present -> (!fir.box<!fir.array<?xf32>>) {
+    fir.result %var : !fir.box<!fir.array<?xf32>>
+  } else {
+    %absent = fir.absent !fir.box<!fir.array<?xf32>>
+    fir.result %absent : !fir.box<!fir.array<?xf32>>
+  }
+  return %if_result : !fir.box<!fir.array<?xf32>>
+}
+
+// CHECK-LABEL: func.func @fold_present_absent_if_ref(
+// CHECK-SAME:     %[[VAR:.*]]: !fir.ref<i32>) -> !fir.ref<i32> {
+// CHECK-NOT: fir.if
+// CHECK: return %[[VAR]] : !fir.ref<i32>
+func.func @fold_present_absent_if_ref(%var: !fir.ref<i32>) -> !fir.ref<i32> {
+  %present = fir.is_present %var : (!fir.ref<i32>) -> i1
+  %if_result = fir.if %present -> (!fir.ref<i32>) {
+    fir.result %var : !fir.ref<i32>
+  } else {
+    %absent = fir.absent !fir.ref<i32>
+    fir.result %absent : !fir.ref<i32>
+  }
+  return %if_result : !fir.ref<i32>
+}
+
+func.func private @side_effect() -> ()
+
+// CHECK-LABEL: func.func @no_fold_call_in_then(
+// CHECK: fir.is_present
+// CHECK: fir.if
+func.func @no_fold_call_in_then(%var: !fir.ref<i32>) -> !fir.ref<i32> {
+  %present = fir.is_present %var : (!fir.ref<i32>) -> i1
+  %if_result = fir.if %present -> (!fir.ref<i32>) {
+    fir.call @side_effect() : () -> ()
+    fir.result %var : !fir.ref<i32>
+  } else {
+    %absent = fir.absent !fir.ref<i32>
+    fir.result %absent : !fir.ref<i32>
+  }
+  return %if_result : !fir.ref<i32>
+}
+
+// CHECK-LABEL: func.func @no_fold_call_in_else(
+// CHECK: fir.is_present
+// CHECK: fir.if
+func.func @no_fold_call_in_else(%var: !fir.ref<i32>) -> !fir.ref<i32> {
+  %present = fir.is_present %var : (!fir.ref<i32>) -> i1
+  %if_result = fir.if %present -> (!fir.ref<i32>) {
+    fir.result %var : !fir.ref<i32>
+  } else {
+    %absent = fir.absent !fir.ref<i32>
+    fir.call @side_effect() : () -> ()
+    fir.result %absent : !fir.ref<i32>
+  }
+  return %if_result : !fir.ref<i32>
+}

>From 35cfa14e389e72b1b852d15c7cfc87172a5d4e4b Mon Sep 17 00:00:00 2001
From: Hristo Hristov <hghristov.rmm at gmail.com>
Date: Wed, 24 Jun 2026 12:48:55 +0300
Subject: [PATCH 07/42] [libc++][ranges] Applied `[[nodiscard]]` to
 `reverse_view` (#205186)

Towards #172124

#References:
- https://wg21.link/range.reverse
-
https://libcxx.llvm.org/CodingGuidelines.html#apply-nodiscard-where-relevant
---
 libcxx/include/__ranges/reverse_view.h        | 16 +--
 .../adaptor.nodiscard.verify.cpp              | 21 ----
 .../range.reverse/nodiscard.verify.cpp        | 99 +++++++++++++++++++
 3 files changed, 107 insertions(+), 29 deletions(-)
 delete mode 100644 libcxx/test/libcxx/ranges/range.adaptors/range.reverse/adaptor.nodiscard.verify.cpp
 create mode 100644 libcxx/test/libcxx/ranges/range.adaptors/range.reverse/nodiscard.verify.cpp

diff --git a/libcxx/include/__ranges/reverse_view.h b/libcxx/include/__ranges/reverse_view.h
index c36ba77dd8f6d..b016cc231f2b5 100644
--- a/libcxx/include/__ranges/reverse_view.h
+++ b/libcxx/include/__ranges/reverse_view.h
@@ -59,13 +59,13 @@ class reverse_view : public view_interface<reverse_view<_View>> {
 
   _LIBCPP_HIDE_FROM_ABI constexpr explicit reverse_view(_View __view) : __base_(std::move(__view)) {}
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const&
     requires copy_constructible<_View>
   {
     return __base_;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); }
 
   _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator<iterator_t<_View>> begin() {
     if constexpr (_UseCache)
@@ -78,35 +78,35 @@ class reverse_view : public view_interface<reverse_view<_View>> {
     return __tmp;
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator<iterator_t<_View>> begin()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator<iterator_t<_View>> begin()
     requires common_range<_View>
   {
     return std::make_reverse_iterator(ranges::end(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const
     requires common_range<const _View>
   {
     return std::make_reverse_iterator(ranges::end(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator<iterator_t<_View>> end() {
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator<iterator_t<_View>> end() {
     return std::make_reverse_iterator(ranges::begin(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const
     requires common_range<const _View>
   {
     return std::make_reverse_iterator(ranges::begin(__base_));
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size()
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size()
     requires sized_range<_View>
   {
     return ranges::size(__base_);
   }
 
-  _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const
     requires sized_range<const _View>
   {
     return ranges::size(__base_);
diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/adaptor.nodiscard.verify.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/adaptor.nodiscard.verify.cpp
deleted file mode 100644
index 2f7eb94611928..0000000000000
--- a/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/adaptor.nodiscard.verify.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-
-// Test the libc++ extension that std::views::reverse is marked as [[nodiscard]].
-
-#include <ranges>
-
-void test() {
-  int range[] = {1, 2, 3};
-
-  std::views::reverse(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
-  range | std::views::reverse; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
-  std::views::all | std::views::reverse; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
-}
diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/nodiscard.verify.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/nodiscard.verify.cpp
new file mode 100644
index 0000000000000..6dfa2f5d17c60
--- /dev/null
+++ b/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/nodiscard.verify.cpp
@@ -0,0 +1,99 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: std-at-least-c++20
+
+// Check that functions are marked [[nodiscard]]
+
+#include <ranges>
+#include <utility>
+
+#include "test_iterators.h"
+
+void test() {
+  int range[] = {19, 28, 29, 49, 82, 94};
+  auto v      = std::views::reverse(range);
+
+  // [range.reverse.view]
+
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  v.base();
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::move(v).base();
+
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  v.begin();
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::as_const(v).begin();
+
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  v.end();
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::as_const(v).end();
+
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  v.size();
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::as_const(v).size();
+
+  // [range.reverse.overview]
+
+  // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+  std::views::reverse(std::views::reverse(range));
+
+  struct BidirRange : std::ranges::view_base {
+    int* begin_;
+    int* end_;
+
+    constexpr BidirRange(int* b, int* e) : begin_(b), end_(e) {}
+
+    constexpr bidirectional_iterator<int*> begin() { return bidirectional_iterator<int*>{begin_}; }
+    constexpr bidirectional_iterator<const int*> begin() const { return bidirectional_iterator<const int*>{begin_}; }
+    constexpr bidirectional_iterator<int*> end() { return bidirectional_iterator<int*>{end_}; }
+    constexpr bidirectional_iterator<const int*> end() const { return bidirectional_iterator<const int*>{end_}; }
+  };
+  static_assert(std::ranges::bidirectional_range<BidirRange>);
+  static_assert(std::ranges::common_range<BidirRange>);
+  static_assert(std::ranges::view<BidirRange>);
+  static_assert(std::copyable<BidirRange>);
+
+  { // views::reverse(x) is equivalent to subrange{end, begin, size} if x is a
+    // sized subrange over reverse iterators
+    using It       = bidirectional_iterator<int*>;
+    using Subrange = std::ranges::subrange<It, It, std::ranges::subrange_kind::sized>;
+
+    using ReverseIt       = std::reverse_iterator<It>;
+    using ReverseSubrange = std::ranges::subrange<ReverseIt, ReverseIt, std::ranges::subrange_kind::sized>;
+
+    BidirRange view(range, range + 6);
+    ReverseSubrange subrange(ReverseIt(std::ranges::end(view)), ReverseIt(std::ranges::begin(view)), /* size */ 6);
+
+    // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+    std::views::reverse(subrange);
+  }
+  { // views::reverse(x) is equivalent to subrange{end, begin} if x is an
+    // unsized subrange over reverse iterators
+    using It       = bidirectional_iterator<int*>;
+    using Subrange = std::ranges::subrange<It, It, std::ranges::subrange_kind::unsized>;
+
+    using ReverseIt       = std::reverse_iterator<It>;
+    using ReverseSubrange = std::ranges::subrange<ReverseIt, ReverseIt, std::ranges::subrange_kind::unsized>;
+
+    BidirRange view(range, range + 6);
+    ReverseSubrange subrange(ReverseIt(std::ranges::end(view)), ReverseIt(std::ranges::begin(view)));
+
+    // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+    std::views::reverse(subrange);
+  }
+  { // Otherwise, views::reverse(x) is equivalent to ranges::reverse_view{x}
+    BidirRange view(range, range + 6);
+
+    // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+    std::views::reverse(view);
+  }
+}

>From 34bdee76b8577302e2e04fe553ab03579e764069 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Wed, 24 Jun 2026 11:50:30 +0200
Subject: [PATCH 08/42] Remove unused variables in the monorepo (#204994)

https://github.com/llvm/llvm-project/pull/203084 adds diagnostics about
unused variables to the libc++ containers. This patch is the fallout
from the projects I tried to build with it.
---
 clang-tools-extra/clangd/IncludeFixer.cpp              |  7 -------
 clang-tools-extra/modularize/PreprocessorTracker.cpp   |  3 ---
 clang/lib/Driver/ToolChains/Hexagon.cpp                |  1 -
 .../Checkers/StdLibraryFunctionsChecker.cpp            |  4 ----
 clang/unittests/AST/ASTImporterFixtures.cpp            |  2 --
 clang/unittests/Interpreter/CodeCompletionTest.cpp     |  2 +-
 clang/unittests/Tooling/HeaderIncludesTest.cpp         |  1 -
 lldb/source/Commands/CommandObjectCommands.cpp         |  3 ---
 lldb/source/Commands/CommandObjectHelp.cpp             |  1 -
 lldb/source/Interpreter/CommandInterpreter.cpp         |  3 ---
 .../Hexagon-DYLD/HexagonDYLDRendezvous.cpp             |  2 --
 .../DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp        |  2 --
 .../GDBRemoteCommunicationServerPlatform.cpp           |  5 +----
 lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp |  2 --
 lldb/source/Target/StackFrameList.cpp                  |  1 -
 lldb/tools/yaml2macho-core/yaml2macho.cpp              |  2 --
 .../Orc/TargetProcess/SimpleRemoteEPCServer.h          |  1 -
 llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h   |  8 ++++----
 llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp             |  2 +-
 llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp |  4 ----
 llvm/tools/llvm-rc/ResourceScriptParser.cpp            |  3 +--
 llvm/tools/llvm-rc/ResourceScriptParser.h              |  1 -
 llvm/tools/sancov/sancov.cpp                           |  1 -
 llvm/unittests/ADT/STLExtrasTest.cpp                   |  4 ++--
 llvm/unittests/CodeGen/PassManagerTest.cpp             |  2 +-
 llvm/unittests/TextAPI/TextStubV3Tests.cpp             | 10 +---------
 llvm/unittests/TextAPI/TextStubV4Tests.cpp             |  4 ----
 .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp       |  1 -
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp            |  1 -
 29 files changed, 12 insertions(+), 71 deletions(-)

diff --git a/clang-tools-extra/clangd/IncludeFixer.cpp b/clang-tools-extra/clangd/IncludeFixer.cpp
index 3f3d7fbefd58e..5ecf853524a3f 100644
--- a/clang-tools-extra/clangd/IncludeFixer.cpp
+++ b/clang-tools-extra/clangd/IncludeFixer.cpp
@@ -613,13 +613,6 @@ IncludeFixer::lookupCached(const SymbolID &ID) const {
   Index.lookup(Req, [&](const Symbol &Sym) { Matches.insert(Sym); });
   auto Syms = std::move(Matches).build();
 
-  std::vector<Fix> Fixes;
-  if (!Syms.empty()) {
-    auto &Matched = *Syms.begin();
-    if (!Matched.IncludeHeaders.empty() && Matched.Definition &&
-        Matched.CanonicalDeclaration.FileURI == Matched.Definition.FileURI)
-      Fixes = fixesForSymbols(Syms);
-  }
   auto E = LookupCache.try_emplace(ID, std::move(Syms));
   return &E.first->second;
 }
diff --git a/clang-tools-extra/modularize/PreprocessorTracker.cpp b/clang-tools-extra/modularize/PreprocessorTracker.cpp
index 04abb2733f5a7..6205b97a17176 100644
--- a/clang-tools-extra/modularize/PreprocessorTracker.cpp
+++ b/clang-tools-extra/modularize/PreprocessorTracker.cpp
@@ -1078,9 +1078,6 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
     auto I = ConditionalExpansions.find(InstanceKey);
     // If existing instance of condition not found, add one.
     if (I == ConditionalExpansions.end()) {
-      std::string InstanceSourceLine =
-          getSourceLocationString(PP, InstanceLoc) + ":\n" +
-          getSourceLine(PP, InstanceLoc) + "\n";
       ConditionalExpansions[InstanceKey] =
           ConditionalTracker(DirectiveKind, ConditionValue,
                              ConditionUnexpandedHandle, InclusionPathHandle);
diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp
index b671db98a7798..3643c0d4e526c 100644
--- a/clang/lib/Driver/ToolChains/Hexagon.cpp
+++ b/clang/lib/Driver/ToolChains/Hexagon.cpp
@@ -534,7 +534,6 @@ void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 std::string HexagonToolChain::getHexagonTargetDir(
       const std::string &InstalledDir,
       const SmallVectorImpl<std::string> &PrefixDirs) const {
-  std::string InstallRelDir;
   const Driver &D = getDriver();
 
   // Locate the rest of the toolchain ...
diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
index 50b34cb181ca5..4fe3e1f7623f6 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
@@ -2149,8 +2149,6 @@ void StdLibraryFunctionsChecker::initFunctionSummaries(
                   ErrnoIrrelevant)
             .ArgConstraint(NotNull(ArgNo(0))));
   } else {
-    const auto ReturnsZeroOrMinusOne =
-        ConstraintSet{ReturnValueCondition(WithinRange, Range(-1, 0))};
     const auto ReturnsZero =
         ConstraintSet{ReturnValueCondition(WithinRange, SingleValue(0))};
     const auto ReturnsMinusOne =
@@ -2161,8 +2159,6 @@ void StdLibraryFunctionsChecker::initFunctionSummaries(
         ConstraintSet{ReturnValueCondition(WithinRange, Range(0, IntMax))};
     const auto ReturnsNonZero =
         ConstraintSet{ReturnValueCondition(OutOfRange, SingleValue(0))};
-    const auto ReturnsFileDescriptor =
-        ConstraintSet{ReturnValueCondition(WithinRange, Range(-1, IntMax))};
     const auto &ReturnsValidFileDescriptor = ReturnsNonnegative;
 
     auto ValidFileDescriptorOrAtFdcwd = [&](ArgNo ArgN) {
diff --git a/clang/unittests/AST/ASTImporterFixtures.cpp b/clang/unittests/AST/ASTImporterFixtures.cpp
index e4e5a078262c3..48421fe90a431 100644
--- a/clang/unittests/AST/ASTImporterFixtures.cpp
+++ b/clang/unittests/AST/ASTImporterFixtures.cpp
@@ -142,7 +142,6 @@ std::tuple<Decl *, Decl *> ASTImporterTestBase::getImportedDecl(
     StringRef FromSrcCode, TestLanguage FromLang, StringRef ToSrcCode,
     TestLanguage ToLang, StringRef Identifier) {
   std::vector<std::string> FromArgs = getCommandLineArgsForLanguage(FromLang);
-  std::vector<std::string> ToArgs = getCommandLineArgsForLanguage(ToLang);
 
   FromTUs.emplace_back(FromSrcCode, InputFileName, FromArgs, Creator,
                        ODRHandling);
@@ -185,7 +184,6 @@ TranslationUnitDecl *ASTImporterTestBase::getTuDecl(StringRef SrcCode,
 
 TranslationUnitDecl *ASTImporterTestBase::getToTuDecl(StringRef ToSrcCode,
                                                       TestLanguage ToLang) {
-  std::vector<std::string> ToArgs = getCommandLineArgsForLanguage(ToLang);
   assert(!ToAST);
   lazyInitToAST(ToLang, ToSrcCode, OutputFileName);
   return ToAST->getASTContext().getTranslationUnitDecl();
diff --git a/clang/unittests/Interpreter/CodeCompletionTest.cpp b/clang/unittests/Interpreter/CodeCompletionTest.cpp
index ceb683497ac74..fcabb28092811 100644
--- a/clang/unittests/Interpreter/CodeCompletionTest.cpp
+++ b/clang/unittests/Interpreter/CodeCompletionTest.cpp
@@ -98,7 +98,7 @@ TEST_F(CodeCompletionTest, TwoDecls) {
 
 TEST_F(CodeCompletionTest, CompFunDeclsNoError) {
   auto Err = llvm::Error::success();
-  auto comps = runComp("void app(", Err);
+  runComp("void app(", Err);
   EXPECT_EQ((bool)Err, false);
 }
 
diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp
index 8259394cb30bc..95fb05885a0b9 100644
--- a/clang/unittests/Tooling/HeaderIncludesTest.cpp
+++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp
@@ -678,7 +678,6 @@ int main() {
     std::vector<int> ints {};
 })cpp";
 
-  auto InsertedCode = insert(Code, "<vector>");
   EXPECT_EQ(Expected, insert(Code, "<vector>"));
 }
 
diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp
index 84e661ec01f53..8f006768ecc9a 100644
--- a/lldb/source/Commands/CommandObjectCommands.cpp
+++ b/lldb/source/Commands/CommandObjectCommands.cpp
@@ -627,7 +627,6 @@ class CommandObjectCommandsUnalias : public CommandObjectParsed {
 
 protected:
   void DoExecute(Args &args, CommandReturnObject &result) override {
-    CommandObject::CommandMap::iterator pos;
     CommandObject *cmd_obj;
 
     if (args.empty()) {
@@ -702,8 +701,6 @@ class CommandObjectCommandsDelete : public CommandObjectParsed {
 
 protected:
   void DoExecute(Args &args, CommandReturnObject &result) override {
-    CommandObject::CommandMap::iterator pos;
-
     if (args.empty()) {
       result.AppendErrorWithFormat("must call '%s' with one or more valid user "
                                    "defined regular expression command names",
diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp
index a29ded846b100..c96c5158c06f6 100644
--- a/lldb/source/Commands/CommandObjectHelp.cpp
+++ b/lldb/source/Commands/CommandObjectHelp.cpp
@@ -64,7 +64,6 @@ CommandObjectHelp::CommandOptions::GetDefinitions() {
 }
 
 void CommandObjectHelp::DoExecute(Args &command, CommandReturnObject &result) {
-  CommandObject::CommandMap::iterator pos;
   CommandObject *cmd_obj;
   const size_t argc = command.GetArgumentCount();
 
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 0c5456c2c3b57..a35baa7fd6d27 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -2962,7 +2962,6 @@ void CommandInterpreter::HandleCommandsFromFile(
   auto input_file_up =
       FileSystem::Instance().Open(cmd_file, File::eOpenOptionReadOnly);
   if (!input_file_up) {
-    std::string error = llvm::toString(input_file_up.takeError());
     result.AppendErrorWithFormatv(
         "error: an error occurred read file '{0}': {1}\n", cmd_file_path,
         llvm::fmt_consume(input_file_up.takeError()));
@@ -3263,8 +3262,6 @@ void CommandInterpreter::FindCommandsForApropos(llvm::StringRef search_word,
                                                 bool search_user_commands,
                                                 bool search_alias_commands,
                                                 bool search_user_mw_commands) {
-  CommandObject::CommandMap::const_iterator pos;
-
   if (search_builtin_commands)
     FindCommandsForApropos(search_word, commands_found, commands_help,
                            m_command_dict);
diff --git a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp
index 62c0fb0ff4eb8..7b5bcc2567dda 100644
--- a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp
@@ -148,7 +148,6 @@ bool HexagonDYLDRendezvous::UpdateSOEntries() {
 
 bool HexagonDYLDRendezvous::UpdateSOEntriesForAddition() {
   SOEntry entry;
-  iterator pos;
 
   assert(m_previous.state == eAdd);
 
@@ -176,7 +175,6 @@ bool HexagonDYLDRendezvous::UpdateSOEntriesForAddition() {
 
 bool HexagonDYLDRendezvous::UpdateSOEntriesForDeletion() {
   SOEntryList entry_list;
-  iterator pos;
 
   assert(m_previous.state == eDelete);
 
diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
index c2084e0322c1f..2d0eef666f688 100644
--- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
+++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp
@@ -497,7 +497,6 @@ bool DYLDRendezvous::RemoveSOEntriesFromRemote(
 
 bool DYLDRendezvous::AddSOEntries() {
   SOEntry entry;
-  iterator pos;
 
   assert(m_previous.state == eAdd);
 
@@ -525,7 +524,6 @@ bool DYLDRendezvous::AddSOEntries() {
 
 bool DYLDRendezvous::RemoveSOEntries() {
   SOEntryList entry_list;
-  iterator pos;
 
   assert(m_previous.state == eDelete);
 
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
index 5876c3a9434a1..eaed4e6742824 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
@@ -156,15 +156,12 @@ GDBRemoteCommunicationServerPlatform::Handle_qLaunchGDBServer(
             __FUNCTION__);
 
   ConnectionFileDescriptor file_conn;
-  std::string hostname;
   packet.SetFilePos(::strlen("qLaunchGDBServer;"));
   llvm::StringRef name;
   llvm::StringRef value;
   std::optional<uint16_t> port;
   while (packet.GetNameColonValue(name, value)) {
-    if (name == "host")
-      hostname = std::string(value);
-    else if (name == "port") {
+    if (name == "port") {
       // Make the Optional valid so we can use its value
       port = 0;
       value.getAsInteger(0, *port);
diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp
index c92f80bc166b7..96d6910ce5ce5 100644
--- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp
+++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp
@@ -98,8 +98,6 @@ llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) {
   auto listening_uris = m_listener->GetListeningConnectionURI();
   if (listening_uris.empty())
     return createStringError("failed to get listening connections");
-  std::string address =
-      llvm::join(m_listener->GetListeningConnectionURI(), ", ");
 
   ServerInfo info{listening_uris[0]};
   llvm::Expected<ServerInfoHandle> server_info_handle = ServerInfo::Write(info);
diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp
index f23261d50ed4b..01ce5870a6edb 100644
--- a/lldb/source/Target/StackFrameList.cpp
+++ b/lldb/source/Target/StackFrameList.cpp
@@ -1021,7 +1021,6 @@ size_t StackFrameList::GetStatus(Stream &strm, uint32_t first_frame,
 
   StackFrameSP selected_frame_sp =
       m_thread.GetSelectedFrame(DoNoSelectMostRelevantFrame);
-  std::string buffer;
   std::string marker;
   for (frame_idx = first_frame; frame_idx < last_frame; ++frame_idx) {
     frame_sp = GetFrameAtIndex(frame_idx);
diff --git a/lldb/tools/yaml2macho-core/yaml2macho.cpp b/lldb/tools/yaml2macho-core/yaml2macho.cpp
index c29ae282d8571..cd51d228ff062 100644
--- a/lldb/tools/yaml2macho-core/yaml2macho.cpp
+++ b/lldb/tools/yaml2macho-core/yaml2macho.cpp
@@ -176,7 +176,6 @@ int main(int argc, char **argv) {
   if (spec.binaries.size() > 0)
     for (const Binary &binary : spec.binaries) {
       std::vector<uint8_t> segment_command_bytes;
-      std::vector<uint8_t> payload_bytes;
       create_lc_note_binary_load_cmd(spec, segment_command_bytes, binary,
                                      lc_note_payload_bytes, payload_fileoff);
       payload_fileoff =
@@ -185,7 +184,6 @@ int main(int argc, char **argv) {
     }
   if (spec.addressable_bits) {
     std::vector<uint8_t> segment_command_bytes;
-    std::vector<uint8_t> payload_bytes;
     create_lc_note_addressable_bits(spec, segment_command_bytes,
                                     *spec.addressable_bits,
                                     lc_note_payload_bytes, payload_fileoff);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
index e5d28345bb84e..0aa040bc00138 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
@@ -188,7 +188,6 @@ class LLVM_ABI SimpleRemoteEPCServer : public SimpleRemoteEPCTransportClient {
 
   uint64_t NextSeqNo = 0;
   PendingJITDispatchResultsMap PendingJITDispatchResults;
-  std::vector<sys::DynamicLibrary> Dylibs;
 };
 
 } // end namespace orc
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
index b73da194cd187..643562616dd22 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
@@ -59,9 +59,9 @@ class GenericNamedTask : public RTTIExtends<GenericNamedTask, Task> {
 /// Generic task implementation.
 template <typename FnT> class GenericNamedTaskImpl : public GenericNamedTask {
 public:
-  GenericNamedTaskImpl(FnT &&Fn, std::string DescBuffer)
-      : Fn(std::forward<FnT>(Fn)), Desc(DescBuffer.c_str()),
-        DescBuffer(std::move(DescBuffer)) {}
+  GenericNamedTaskImpl(FnT &&Fn, std::string InDescBuffer)
+      : Fn(std::forward<FnT>(Fn)), DescBuffer(std::move(InDescBuffer)),
+        Desc(DescBuffer.c_str()) {}
   GenericNamedTaskImpl(FnT &&Fn, const char *Desc)
       : Fn(std::forward<FnT>(Fn)), Desc(Desc) {
     assert(Desc && "Description cannot be null");
@@ -71,8 +71,8 @@ template <typename FnT> class GenericNamedTaskImpl : public GenericNamedTask {
 
 private:
   FnT Fn;
-  const char *Desc;
   std::string DescBuffer;
+  const char *Desc;
 };
 
 /// Create a generic named task from a std::string description.
diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
index 2c0eeac0089ae..606f5a520fd7d 100644
--- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
+++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp
@@ -36,7 +36,7 @@ void DWARFCFIState::update(const MCCFIInstruction &Directive) {
   // updated row and following the previous rows. These middle rows are stored
   // in `PrecedingRows`. For now, there is no need to store these rows in the
   // state, so they are ignored in the end.
-  dwarf::UnwindTable::RowContainer PrecedingRows;
+  // dwarf::UnwindTable::RowContainer PrecedingRows;
 
   // TODO: `.cfi_remember_state` and `.cfi_restore_state` directives are not
   // supported yet. The reason is that `parseRows` expects the stack of states
diff --git a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp
index 4165274a6f8bd..d1c0022f71dcc 100644
--- a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp
+++ b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp
@@ -88,7 +88,6 @@ static std::string DependencyInfoPath;
 static bool VersionOption;
 static bool NoWarningForNoSymbols;
 static bool WarningsAsErrors;
-static std::string IgnoredSyslibRoot;
 
 static const std::array<std::string, 3> StandardSearchDirs{
     "/lib",
@@ -659,9 +658,6 @@ static void parseRawArgs(int Argc, char **Argv) {
   if (const opt::Arg *A = Args.getLastArg(OPT_dependencyInfoPath))
     DependencyInfoPath = A->getValue();
 
-  if (const opt::Arg *A = Args.getLastArg(OPT_ignoredSyslibRoot))
-    IgnoredSyslibRoot = A->getValue();
-
   LibraryOperation =
       Args.hasArg(OPT_static) ? Operation::Static : Operation::None;
   DeterministicOption = Args.hasArg(OPT_deterministicOption);
diff --git a/llvm/tools/llvm-rc/ResourceScriptParser.cpp b/llvm/tools/llvm-rc/ResourceScriptParser.cpp
index e4efc83c933b4..f28efb6de611e 100644
--- a/llvm/tools/llvm-rc/ResourceScriptParser.cpp
+++ b/llvm/tools/llvm-rc/ResourceScriptParser.cpp
@@ -33,8 +33,7 @@ namespace llvm {
 namespace rc {
 
 RCParser::ParserError::ParserError(const Twine &Expected, const LocIter CurLoc,
-                                   const LocIter End)
-    : ErrorLoc(CurLoc), FileEnd(End) {
+                                   const LocIter End) {
   CurMessage = "Error parsing file: expected " + Expected.str() + ", got " +
                (CurLoc == End ? "<EOF>" : CurLoc->value()).str();
 }
diff --git a/llvm/tools/llvm-rc/ResourceScriptParser.h b/llvm/tools/llvm-rc/ResourceScriptParser.h
index 1e7618c84142e..ed7a09b16834e 100644
--- a/llvm/tools/llvm-rc/ResourceScriptParser.h
+++ b/llvm/tools/llvm-rc/ResourceScriptParser.h
@@ -48,7 +48,6 @@ class RCParser {
 
   private:
     std::string CurMessage;
-    LocIter ErrorLoc, FileEnd;
   };
 
   explicit RCParser(std::vector<RCToken> TokenList);
diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index 483e6d7d58d68..d6fdac2583d5d 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -987,7 +987,6 @@ computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) {
 
 static std::set<FileFn>
 computeCoveredFunctions(const SymbolizedCoverage &Coverage) {
-  auto AllFns = computeFunctions(Coverage.Points);
   std::set<FileFn> Result;
 
   for (const auto &Point : Coverage.Points) {
diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp
index 2f1e01d16e3c5..00c062ae1ca29 100644
--- a/llvm/unittests/ADT/STLExtrasTest.cpp
+++ b/llvm/unittests/ADT/STLExtrasTest.cpp
@@ -1170,7 +1170,7 @@ TEST(STLExtrasTest, getSingleElement) {
 }
 
 TEST(STLExtrasTest, hasNItems) {
-  const std::list<int> V0 = {}, V1 = {1}, V2 = {1, 2};
+  const std::list<int> V0 = {}, V1 = {1};
   const std::list<int> V3 = {1, 3, 5};
 
   EXPECT_TRUE(hasNItems(V0, 0));
@@ -1191,7 +1191,7 @@ TEST(STLExtrasTest, hasNItems) {
 }
 
 TEST(STLExtras, hasNItemsOrMore) {
-  const std::list<int> V0 = {}, V1 = {1}, V2 = {1, 2};
+  const std::list<int> V1 = {1}, V2 = {1, 2};
   const std::list<int> V3 = {1, 3, 5};
 
   EXPECT_TRUE(hasNItemsOrMore(V1, 1));
diff --git a/llvm/unittests/CodeGen/PassManagerTest.cpp b/llvm/unittests/CodeGen/PassManagerTest.cpp
index dc83effd3c1e1..f004d14b4aee4 100644
--- a/llvm/unittests/CodeGen/PassManagerTest.cpp
+++ b/llvm/unittests/CodeGen/PassManagerTest.cpp
@@ -218,7 +218,7 @@ TEST_F(PassManagerTest, Basic) {
 
   testing::internal::CaptureStderr();
   MPM.run(*M, MAM);
-  std::string Output = testing::internal::GetCapturedStderr();
+  testing::internal::GetCapturedStderr();
 
   EXPECT_EQ((std::vector<int>{10, 16, 18, 20, 30, 36, 38, 40}), Counts);
   EXPECT_EQ(40, Count);
diff --git a/llvm/unittests/TextAPI/TextStubV3Tests.cpp b/llvm/unittests/TextAPI/TextStubV3Tests.cpp
index 3c822b2188a41..19624c9dc2abb 100644
--- a/llvm/unittests/TextAPI/TextStubV3Tests.cpp
+++ b/llvm/unittests/TextAPI/TextStubV3Tests.cpp
@@ -81,10 +81,6 @@ TEST(TBDv3, ReadFile) {
   for (auto &&arch : Archs)
     Targets.emplace_back(Target(arch, Platform));
   EXPECT_EQ(Archs, File->getArchitectures());
-  TargetToAttr Uuids = {{Target(AK_armv7, PLATFORM_UNKNOWN),
-                         "00000000-0000-0000-0000-000000000000"},
-                        {Target(AK_arm64, PLATFORM_UNKNOWN),
-                         "11111111-1111-1111-1111-111111111111"}};
   EXPECT_EQ(File->getPlatforms().size(), 1U);
   EXPECT_EQ(Platform, *File->getPlatforms().begin());
   EXPECT_EQ(std::string("Test.dylib"), File->getInstallName());
@@ -165,10 +161,6 @@ TEST(TBDv3, ReadMultipleDocuments) {
   for (auto &&arch : Archs)
     Targets.emplace_back(Target(arch, Platform));
   EXPECT_EQ(Archs, File->getArchitectures());
-  TargetToAttr Uuids = {{Target(AK_armv7, PLATFORM_UNKNOWN),
-                         "00000000-0000-0000-0000-000000000000"},
-                        {Target(AK_arm64, PLATFORM_UNKNOWN),
-                         "11111111-1111-1111-1111-111111111111"}};
   EXPECT_EQ(File->getPlatforms().size(), 1U);
   EXPECT_EQ(Platform, *File->getPlatforms().begin());
   EXPECT_EQ(std::string("Test.dylib"), File->getInstallName());
@@ -358,7 +350,7 @@ TEST(TBDv3, WriteMultipleDocuments) {
   Document.addSymbol(EncodeKind::GlobalSymbol, "_sym3", Targets);
   Document.addSymbol(EncodeKind::GlobalSymbol, "_sym4", Targets);
   File.addDocument(std::make_shared<InterfaceFile>(std::move(Document)));
-  
+
   SmallString<4096> Buffer;
   raw_svector_ostream OS(Buffer);
   Error Result = TextAPIWriter::writeToStream(OS, File);
diff --git a/llvm/unittests/TextAPI/TextStubV4Tests.cpp b/llvm/unittests/TextAPI/TextStubV4Tests.cpp
index 3f9d8d9b7deaa..11c485198b704 100644
--- a/llvm/unittests/TextAPI/TextStubV4Tests.cpp
+++ b/llvm/unittests/TextAPI/TextStubV4Tests.cpp
@@ -394,8 +394,6 @@ TEST(TBDv4, WriteMultipleDocuments) {
       Target(AK_i386, Platform),
       Target(AK_x86_64, Platform),
   };
-  TargetToAttr Uuids = {{Targets[0], "00000000-0000-0000-0000-000000000002"},
-                        {Targets[1], "11111111-1111-1111-1111-111111111112"}};
   File.setInstallName("/System/Library/Frameworks/Umbrella.framework/Umbrella");
   File.setFileType(FileType::TBD_V4);
   File.addTargets(Targets);
@@ -409,8 +407,6 @@ TEST(TBDv4, WriteMultipleDocuments) {
                             Targets[1]);
 
   // Write Second Document
-  Uuids = {{Targets[0], "00000000-0000-0000-0000-000000000000"},
-           {Targets[1], "11111111-1111-1111-1111-111111111111"}};
   InterfaceFile Document;
   Document.setInstallName("/System/Library/Frameworks/A.framework/A");
   Document.setFileType(FileType::TBD_V4);
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 77cc7a388a984..0f954e384929a 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -6546,7 +6546,6 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
     // to use or standardizing/canonicalizing the order of the bounds to compute
     // the offset may be useful in the future when there's other frontends with
     // different formats.
-    std::vector<llvm::Value *> dimensionIndexSizeOffset;
     for (int i = bounds.size() - 1; i >= 0; --i) {
       if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
               bounds[i].getDefiningOp())) {
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 90ada40302296..ee73c9841e240 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -3429,7 +3429,6 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder(
     });
   };
   if (op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) {
-    std::string sizes = op.getGetterName(operandSegmentAttrName);
     body << "  ::llvm::copy(::llvm::ArrayRef<int32_t>({";
     emitSegment();
     body << "}), " << builderOpStateProperties

>From 86e2f0f502066a2624981d6272b0dd727edc78b7 Mon Sep 17 00:00:00 2001
From: Akimasa Watanuki <mencotton0410 at gmail.com>
Date: Wed, 24 Jun 2026 19:02:31 +0900
Subject: [PATCH 09/42] [CIR] Handle const evaluated variable values (#205512)

Match the `VarDecl::evaluateValue()` contract updated by #205033 in CIR
constant emission.
---
 clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index 6c64d7571795a..c29b66ac2f8bc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -1725,7 +1725,7 @@ mlir::Attribute ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &d) {
 
   // Try to emit the initializer.  Note that this can allow some things that
   // are not allowed by tryEmitPrivateForMemory alone.
-  if (APValue *value = d.evaluateValue())
+  if (const APValue *value = d.evaluateValue())
     return tryEmitPrivateForMemory(*value, destType);
 
   return {};

>From 20988f874dc241b8b7c21f5c0b1f4c032835c2d2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 24 Jun 2026 11:08:01 +0100
Subject: [PATCH 10/42] [X86] madd.ll - add additional tests for matchPMADDWD
 folds that fail with irregular source types (#205514)

Ensure #205391 doesn't crash with non-pow2/illegal types
---
 llvm/test/CodeGen/X86/madd.ll | 235 ++++++++++++++++++++++++++++++++++
 1 file changed, 235 insertions(+)

diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll
index 056712cc5a66c..63b390f4b9bdf 100644
--- a/llvm/test/CodeGen/X86/madd.ll
+++ b/llvm/test/CodeGen/X86/madd.ll
@@ -3739,3 +3739,238 @@ define <16 x i32> @extract_concat_pmaddwd(<32 x i16> %a, <32 x i16> %b) {
   %ret = add <16 x i32> %odd, %even
   ret <16 x i32> %ret
 }
+
+define <5 x i32> @oddvector_mul(<16 x i16> %A, <16 x i16> %B) {
+; SSE2-LABEL: oddvector_mul:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movq %rdi, %rax
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pmulhw %xmm3, %xmm4
+; SSE2-NEXT:    pmullw %xmm3, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pmulhw %xmm2, %xmm3
+; SSE2-NEXT:    pmullw %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2],xmm2[0,2]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
+; SSE2-NEXT:    paddd %xmm3, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
+; SSE2-NEXT:    movd %xmm2, %ecx
+; SSE2-NEXT:    movdqa %xmm0, (%rdi)
+; SSE2-NEXT:    movd %xmm1, %edx
+; SSE2-NEXT:    addl %ecx, %edx
+; SSE2-NEXT:    movl %edx, 16(%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: oddvector_mul:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    movq %rdi, %rax
+; SSE42-NEXT:    pxor %xmm4, %xmm4
+; SSE42-NEXT:    pmovzxwd {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; SSE42-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSE42-NEXT:    pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE42-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE42-NEXT:    pmaddwd %xmm2, %xmm0
+; SSE42-NEXT:    pmaddwd %xmm5, %xmm4
+; SSE42-NEXT:    phaddd %xmm0, %xmm4
+; SSE42-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; SSE42-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE42-NEXT:    pmaddwd %xmm0, %xmm1
+; SSE42-NEXT:    movd %xmm1, %ecx
+; SSE42-NEXT:    movdqa %xmm4, (%rdi)
+; SSE42-NEXT:    pextrd $1, %xmm1, %edx
+; SSE42-NEXT:    addl %ecx, %edx
+; SSE42-NEXT:    movl %edx, 16(%rdi)
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: oddvector_mul:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpmovsxwd %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm5 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; AVX1-NEXT:    vpmaddwd %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vphaddd %xmm0, %xmm4, %xmm1
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
+; AVX1-NEXT:    vpmaddwd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vphaddd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: oddvector_mul:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm2
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwd %xmm1, %ymm3
+; AVX2-NEXT:    vpmulld %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; AVX2-NEXT:    vpmovsxwd %xmm1, %ymm1
+; AVX2-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vphaddd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: oddvector_mul:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512-NEXT:    vpmovsxwd %ymm1, %zmm1
+; AVX512-NEXT:    vpmulld %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX512-NEXT:    retq
+   %a = sext <16 x i16> %A to <16 x i32>
+   %b = sext <16 x i16> %B to <16 x i32>
+   %m = mul nsw <16 x i32> %a, %b
+   %odd = shufflevector <16 x i32> %m, <16 x i32> undef, <5 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8>
+   %even = shufflevector <16 x i32> %m, <16 x i32> undef, <5 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9>
+   %ret = add <5 x i32> %odd, %even
+   ret <5 x i32> %ret
+}
+
+define <4 x i32> @oddvector_sext(<13 x i16> %A) {
+; SSE2-LABEL: oddvector_sext:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pxor %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pinsrw $1, %r8d, %xmm1
+; SSE2-NEXT:    pinsrw $3, %r9d, %xmm1
+; SSE2-NEXT:    pinsrw $5, {{[0-9]+}}(%rsp), %xmm1
+; SSE2-NEXT:    pinsrw $7, {{[0-9]+}}(%rsp), %xmm1
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    pinsrw $1, %edi, %xmm0
+; SSE2-NEXT:    pinsrw $3, %esi, %xmm0
+; SSE2-NEXT:    pinsrw $5, %edx, %xmm0
+; SSE2-NEXT:    pinsrw $7, %ecx, %xmm0
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE2-NEXT:    paddd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: oddvector_sext:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    movd %edi, %xmm0
+; SSE42-NEXT:    pinsrw $1, %esi, %xmm0
+; SSE42-NEXT:    pinsrw $2, %edx, %xmm0
+; SSE42-NEXT:    pinsrw $3, %ecx, %xmm0
+; SSE42-NEXT:    movd %r8d, %xmm1
+; SSE42-NEXT:    pinsrw $1, %r9d, %xmm1
+; SSE42-NEXT:    pinsrw $2, {{[0-9]+}}(%rsp), %xmm1
+; SSE42-NEXT:    pinsrw $3, {{[0-9]+}}(%rsp), %xmm1
+; SSE42-NEXT:    pmovsxwd %xmm1, %xmm1
+; SSE42-NEXT:    pmovsxwd %xmm0, %xmm0
+; SSE42-NEXT:    phaddd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: oddvector_sext:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: oddvector_sext:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: oddvector_sext:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+   %a = sext <13 x i16> %A to <13 x i32>
+   %odd = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+   %even = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+   %ret = add <4 x i32> %odd, %even
+   ret <4 x i32> %ret
+}
+
+define <3 x i32> @oddvector_shl(<12 x i16> %A) {
+; SSE2-LABEL: oddvector_shl:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm0, %xmm0
+; SSE2-NEXT:    pinsrw $1, %edi, %xmm0
+; SSE2-NEXT:    pinsrw $3, %esi, %xmm0
+; SSE2-NEXT:    pinsrw $5, %edx, %xmm0
+; SSE2-NEXT:    pinsrw $7, %ecx, %xmm0
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    pinsrw $1, %r8d, %xmm1
+; SSE2-NEXT:    pinsrw $3, %r9d, %xmm1
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    pslld $7, %xmm1
+; SSE2-NEXT:    pslld $7, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE2-NEXT:    paddd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: oddvector_shl:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    movd %edi, %xmm0
+; SSE42-NEXT:    pinsrw $1, %esi, %xmm0
+; SSE42-NEXT:    pinsrw $2, %edx, %xmm0
+; SSE42-NEXT:    pinsrw $3, %ecx, %xmm0
+; SSE42-NEXT:    pmovsxwd %xmm0, %xmm0
+; SSE42-NEXT:    movd %r8d, %xmm1
+; SSE42-NEXT:    pinsrw $1, %r9d, %xmm1
+; SSE42-NEXT:    pmovsxwd %xmm1, %xmm1
+; SSE42-NEXT:    pslld $7, %xmm1
+; SSE42-NEXT:    pslld $7, %xmm0
+; SSE42-NEXT:    phaddd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: oddvector_shl:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX1-NEXT:    vpmovsxwd %xmm1, %xmm1
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT:    vpslld $7, %xmm0, %xmm0
+; AVX1-NEXT:    vpslld $7, %xmm1, %xmm1
+; AVX1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: oddvector_shl:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vpslld $7, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: oddvector_shl:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmovsxbd {{.*#+}} xmm1 = [1,3,5,0]
+; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX512-NEXT:    vpslld $7, %ymm0, %ymm0
+; AVX512-NEXT:    vpermd %ymm0, %ymm1, %ymm1
+; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
+; AVX512-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+   %a = sext <12 x i16> %A to <12 x i32>
+   %shl = shl <12 x i32> %a, splat (i32 7)
+   %odd = shufflevector <12 x i32> %shl, <12 x i32> undef, <3 x i32> <i32 0, i32 2, i32 4>
+   %even = shufflevector <12 x i32> %shl, <12 x i32> undef, <3 x i32> <i32 1, i32 3, i32 5>
+   %ret = add <3 x i32> %odd, %even
+   ret <3 x i32> %ret
+}

>From 08e389cc4a029fb6d659f3f1e277ab3bce461a38 Mon Sep 17 00:00:00 2001
From: Anonmiraj <ezzibrahimx at gmail.com>
Date: Wed, 24 Jun 2026 13:18:58 +0300
Subject: [PATCH 11/42] [APINotes] Skip per-decl ProcessAPINotes work when no
 API notes are active (#203710)

Cache "any API notes active" and skip per-decl lookups.

| function | before #202727 | trunk (parent) | this PR |
| ------------------------ | ----------------- | ----------------- |
------------------- |
| `Sema::ProcessAPINotes` | 2,385,852 (0.21%) | 1,028,280 (0.09%) |
**365,310 (0.03%)** |
| `UnwindNamespaceContext` | 349,244 | gone | gone |
| `findAPINotes` | 448,140 | 473,550 | **gone** |


cc @egorzhdan @Xazax-hun @compnerd
---
 clang/include/clang/APINotes/APINotesManager.h | 7 +++++++
 clang/lib/APINotes/APINotesManager.cpp         | 5 +++++
 clang/lib/Sema/SemaAPINotes.cpp                | 2 ++
 3 files changed, 14 insertions(+)

diff --git a/clang/include/clang/APINotes/APINotesManager.h b/clang/include/clang/APINotes/APINotesManager.h
index 772fa5faa0f87..aaf48706fb26b 100644
--- a/clang/include/clang/APINotes/APINotesManager.h
+++ b/clang/include/clang/APINotes/APINotesManager.h
@@ -50,6 +50,11 @@ class APINotesManager {
   /// source file from which an entity was declared.
   bool ImplicitAPINotes;
 
+  /// Cached value of hasAPINotes() true once any current-module reader has
+  /// been loaded, or if implicit API notes lookup is enabled. Monotonic within
+  /// a compilation, so it can be tested per-declaration without recomputing.
+  bool HasAPINotes;
+
   /// Whether to apply all APINotes as optionally-applied versioned
   /// entities. This means that when building a Clang module,
   /// we capture every note on a given decl wrapped in a SwiftVersionedAttr
@@ -172,6 +177,8 @@ class APINotesManager {
     return ArrayRef(CurrentModuleReaders).slice(0, HasPrivate ? 2 : 1);
   }
 
+  bool hasAPINotes() const { return HasAPINotes; }
+
   /// Find the API notes readers that correspond to the given source location.
   llvm::SmallVector<APINotesReader *, 2> findAPINotes(SourceLocation Loc);
 
diff --git a/clang/lib/APINotes/APINotesManager.cpp b/clang/lib/APINotes/APINotesManager.cpp
index acb84c3949cb1..2cc801d5415b8 100644
--- a/clang/lib/APINotes/APINotesManager.cpp
+++ b/clang/lib/APINotes/APINotesManager.cpp
@@ -50,6 +50,7 @@ class PrettyStackTraceDoubleString : public llvm::PrettyStackTraceEntry {
 
 APINotesManager::APINotesManager(SourceManager &SM, const LangOptions &LangOpts)
     : SM(SM), ImplicitAPINotes(LangOpts.APINotes),
+      HasAPINotes(LangOpts.APINotes),
       VersionIndependentSwift(LangOpts.SwiftVersionIndependentAPINotes) {}
 
 APINotesManager::~APINotesManager() {
@@ -319,6 +320,8 @@ bool APINotesManager::loadCurrentModuleAPINotes(
       M->APINotesFile = File.getName().str();
   }
 
+  if (NumReaders > 0)
+    HasAPINotes = true;
   return NumReaders > 0;
 }
 
@@ -331,6 +334,8 @@ bool APINotesManager::loadCurrentModuleAPINotesFromBuffer(
 
     CurrentModuleReaders[NumReader++] = Reader.release();
   }
+  if (NumReader > 0)
+    HasAPINotes = true;
   return NumReader;
 }
 
diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp
index a99408a4c8a7b..67c08d239e758 100644
--- a/clang/lib/Sema/SemaAPINotes.cpp
+++ b/clang/lib/Sema/SemaAPINotes.cpp
@@ -998,6 +998,8 @@ UnwindTagContext(TagDecl *DC, api_notes::APINotesManager &APINotes) {
 void Sema::ProcessAPINotes(Decl *D) {
   if (!D)
     return;
+  if (!APINotes.hasAPINotes())
+    return;
   auto Readers = APINotes.findAPINotes(D->getLocation());
   if (Readers.empty())
     return;

>From d8b6b295ac3064c2ef0a2a44e6d25663802ce866 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Wed, 24 Jun 2026 11:30:13 +0100
Subject: [PATCH 12/42] [AArch64][TableGen] Define ZA, ZT0 and FPMR memory
 defvars (#154144)

Introduce TableGen defvars for the inaccessible memory effects used to
model accesses to ZA, ZT0 and FPMR in IntrinsicsAArch64.td.

This is a preparatory cleanup for a follow-up patch that will replace
these uses of InaccessibleMem with target-specific memory locations.
Other uses of inaccessible memory in the file are left unchanged because
they are unrelated to ZA, ZT0 or FPMR.

This preserves the existing memory effects. In particular, intrinsics
that currently access both argument memory and inaccessible memory keep
the same ArgMem/InaccessibleMem read/write modelling.

---------

Co-authored-by: Paul Walker <paul.walker at arm.com>
---
 llvm/include/llvm/IR/Intrinsics.td        |   5 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td | 218 +++++++++++++---------
 2 files changed, 134 insertions(+), 89 deletions(-)

diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index d2517fbc4c8b2..c4eedb62d46fc 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -74,6 +74,11 @@ class IntrWrite<list<IntrinsicMemoryLocation> idx> : IntrinsicProperty {
    list<IntrinsicMemoryLocation> MemLoc=idx;
 }
 
+// Constrain intrinsic to not write any memory location.
+defvar IntrReadOnly = IntrWrite<[]>;
+// Constrain intrinsic to not read any memory location.
+defvar IntrWriteOnly = IntrRead<[]>;
+
 // Commutative - This intrinsic is commutative: X op Y == Y op X.
 def Commutative : IntrinsicProperty;
 
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 2923595486712..5ba1f4ba861d2 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -732,6 +732,11 @@ def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
 def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
 def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
 
+// Maps Memory locations to registers.
+defvar FPMR = InaccessibleMem;
+defvar ZT0 = InaccessibleMem;
+defvar ZA = InaccessibleMem;
+
 let TargetPrefix = "aarch64" in {
   class FPENV_Get_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>;
@@ -740,7 +745,7 @@ let TargetPrefix = "aarch64" in {
   class RNDR_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>;
   class FPMR_Set_Intrinsic
-    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWrite<[FPMR]>, IntrWriteOnly]>;
 }
 
 // FP environment registers.
@@ -965,7 +970,8 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
 
   // Conversions
   class AdvSIMD_FP8_1VectorArg_Long_Intrinsic
-    : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly]>;
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   def int_aarch64_neon_fp8_cvtl1   : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
   def int_aarch64_neon_fp8_cvtl2   : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
@@ -974,13 +980,13 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
   def int_aarch64_neon_fp8_fcvtn2
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   // Dot-product
   class AdvSIMD_FP8_DOT_Intrinsic
@@ -988,14 +994,14 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              LLVMMatchType<1>],
-                             [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
   class AdvSIMD_FP8_DOT_LANE_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_anyvector_ty,
                              llvm_v16i8_ty,
                              llvm_i32_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                             [IntrRead<[FPMR]>, IntrReadOnly, ImmArg<ArgIndex<3>>]>;
 
   def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic;
   def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic;
@@ -1010,7 +1016,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
                             [LLVMMatchType<0>,
                              llvm_v16i8_ty,
                              llvm_v16i8_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   class AdvSIMD_FP8_FMLA_LANE_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -1018,7 +1024,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat
                              llvm_v16i8_ty,
                              llvm_v16i8_ty,
                              llvm_i32_ty],
-                             [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                             [IntrRead<[FPMR]>, IntrReadOnly, ImmArg<ArgIndex<3>>]>;
 
   def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic;
   def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic;
@@ -2860,7 +2866,7 @@ def int_aarch64_sve_fmmla
 def int_aarch64_sve_fp8_fmmla
   : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                           [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                          [IntrReadMem, IntrInaccessibleMemOnly]>;
+                          [IntrRead<[FPMR]>, IntrReadOnly]>;
 
 //
 // SVE ACLE: 7.2. BFloat16 extensions
@@ -2963,7 +2969,9 @@ def int_aarch64_sve_whilewr_d : SVE2_CONFLICT_DETECT_Intrinsic<[IntrSpeculatable
 let TargetPrefix = "aarch64" in {
   class SME_Load_Store_Intrinsic<LLVMType pred_ty>
     : DefaultAttrsIntrinsic<[],
-        [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<2>>]>;
+        [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty],
+        [IntrRead<[ArgMem, ZA]>, IntrWrite<[ArgMem, ZA]>,
+         ImmArg<ArgIndex<2>>]>;
 
   // Loads
   def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic<llvm_nxv16i1_ty>;
@@ -2991,18 +2999,21 @@ let TargetPrefix = "aarch64" in {
 
   // Spill + fill
   class SME_LDR_STR_ZA_Intrinsic
-    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], [IntrRead<[ArgMem, ZA]>, IntrWrite<[ArgMem, ZA]>]>;
   def int_aarch64_sme_ldr : SME_LDR_STR_ZA_Intrinsic;
   def int_aarch64_sme_str : SME_LDR_STR_ZA_Intrinsic;
 
+
   class SME_TileToVector_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
           [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-           llvm_i32_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>;
+           llvm_i32_ty, llvm_i32_ty], [IntrRead<[ZA]>, IntrReadOnly,
+                                        ImmArg<ArgIndex<2>>]>;
   class SME_VectorToTile_Intrinsic
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                                ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_read_horiz  : SME_TileToVector_Intrinsic;
   def int_aarch64_sme_read_vert   : SME_TileToVector_Intrinsic;
@@ -3017,13 +3028,13 @@ let TargetPrefix = "aarch64" in {
   class SME_MOVAZ_TileToVector_X2_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>;
 
   class SME_MOVAZ_TileToVector_X4_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
            LLVMMatchType<0>,LLVMMatchType<0>],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic;
   def int_aarch64_sme_readz_vert_x2  : SME_MOVAZ_TileToVector_X2_Intrinsic;
@@ -3034,7 +3045,7 @@ let TargetPrefix = "aarch64" in {
   class SME_MOVAZ_TileToVector_Intrinsic
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
           [llvm_i32_ty, llvm_i32_ty],
-          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_readz_horiz : SME_MOVAZ_TileToVector_Intrinsic;
   def int_aarch64_sme_readz_vert  : SME_MOVAZ_TileToVector_Intrinsic;
@@ -3045,23 +3056,24 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_readz_x2
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
           [llvm_i32_ty],
-          [IntrInaccessibleMemOnly]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>]>;
 
   def int_aarch64_sme_readz_x4
       : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
           [llvm_i32_ty],
-          [IntrInaccessibleMemOnly]>;
+          [IntrWrite<[ZA]>, IntrRead<[ZA]>]>;
 
   def int_aarch64_sme_write_lane_zt
        :  DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty],
-            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly]>;
+            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>,
+             IntrWrite<[ZT0]>]>;
 
   def int_aarch64_sme_write_zt
        :  DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty],
-            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrWriteMem]>;
+            [ImmArg<ArgIndex<0>>, IntrWrite<[ZT0]>, IntrWriteOnly]>;
 
 
-  def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+  def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWrite<[ZA]>, IntrWriteOnly, ImmArg<ArgIndex<0>>]>;
   def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">;
 
   class SME_OuterProduct_Intrinsic
@@ -3070,7 +3082,8 @@ let TargetPrefix = "aarch64" in {
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMMatchType<0>,
-           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                                ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic;
   def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic;
@@ -3088,17 +3101,32 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic;
 
   class SME_OuterProduct_QuarterTile_Single_Single
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+           llvm_anyvector_ty,
+           LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>,
+                               IntrWrite<[ZA]>, IntrHasSideEffects]>;
+
+ class SME_FP8_OuterProduct_QuarterTile_Single_Single
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_anyvector_ty,
-          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>;
 
   class SME_OuterProduct_QuarterTile_Single_Multi
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_anyvector_ty,
           LLVMMatchType<0>,
-          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>,
+                              IntrWrite<[ZA]>, IntrHasSideEffects]>;
+
+  class SME_FP8_OuterProduct_QuarterTile_Single_Multi
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+          llvm_anyvector_ty,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>;
 
   class SME_OuterProduct_QuarterTile_Multi_Multi
       : DefaultAttrsIntrinsic<[],
@@ -3106,7 +3134,16 @@ let TargetPrefix = "aarch64" in {
           llvm_anyvector_ty,
           LLVMMatchType<0>,
           LLVMMatchType<0>,
-          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>;
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[ZA]>,
+                              IntrWrite<[ZA]>, IntrHasSideEffects]>;
+
+  class SME_FP8_OuterProduct_QuarterTile_Multi_Multi
+      : DefaultAttrsIntrinsic<[],
+          [llvm_i32_ty,
+          llvm_anyvector_ty,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>,
+          LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>;
 
   // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S
   foreach mode = ["s", "a"] in {
@@ -3138,8 +3175,8 @@ let TargetPrefix = "aarch64" in {
           LLVMMatchType<0>,
           llvm_nxv16i8_ty,
           llvm_i32_ty],
-         [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>,
-          IntrInaccessibleMemOnly]>;
+         [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>, IntrRead<[ZA]>,
+          IntrWrite<[ZA]>]>;
 
   def int_aarch64_sme_ftmopa_za16 : SME_OuterProduct_TMOP_Intrinsic;
   def int_aarch64_sme_ftmopa_za32 : SME_OuterProduct_TMOP_Intrinsic;
@@ -3157,17 +3194,17 @@ let TargetPrefix = "aarch64" in {
           llvm_nxv16i8_ty,
           llvm_i32_ty],
          [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<5>>,
-          IntrInaccessibleMemOnly]>;
+          IntrRead<[ZA, FPMR]>, IntrWrite<[ZA]>]>;
 
   def int_aarch64_sme_fp8_ftmopa_za16 : SME_FP8_OuterProduct_TMOP_Intrinsic;
   def int_aarch64_sme_fp8_ftmopa_za32 : SME_FP8_OuterProduct_TMOP_Intrinsic;
 
   // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product
   foreach za = ["za16", "za32"] in {
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi;
-    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_QuarterTile_Single_Single;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_QuarterTile_Single_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_QuarterTile_Single_Multi;
+    def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_QuarterTile_Multi_Multi;
   }
 
   class SME_AddVectorToTile_Intrinsic
@@ -3175,7 +3212,8 @@ let TargetPrefix = "aarch64" in {
           [llvm_i32_ty,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
            LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
-           llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+           llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                                ImmArg<ArgIndex<0>>]>;
 
   def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic;
   def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic;
@@ -3199,9 +3237,9 @@ let TargetPrefix = "aarch64" in {
                               [IntrNoMem, IntrHasSideEffects]>;
 
   def int_aarch64_sme_za_enable
-      : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>;
+      : DefaultAttrsIntrinsic<[], [], [IntrWrite<[ZA, ZT0]>, IntrWriteOnly]>;
   def int_aarch64_sme_za_disable
-      : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>;
+      : DefaultAttrsIntrinsic<[], [], [IntrWrite<[ZA, ZT0]>, IntrWriteOnly]>;
 
   // Clamp
   //
@@ -3290,56 +3328,59 @@ let TargetPrefix = "aarch64" in {
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
                  LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>, LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
                  LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
-                [IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_Matrix_ArrayVector_Single_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                 llvm_anyvector_ty,
                 LLVMMatchType<0>, llvm_i32_ty],
-                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                 ImmArg<ArgIndex<3>>]>;
 
   class SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>,
                  LLVMMatchType<0>, llvm_i32_ty],
-                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                 ImmArg<ArgIndex<4>>]>;
 
   class SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic
     : DefaultAttrsIntrinsic<[],
                 [llvm_i32_ty,
                  llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
                  LLVMMatchType<0>, llvm_i32_ty],
-                [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
+                [IntrRead<[ZA]>, IntrWrite<[ZA]>,
+                 ImmArg<ArgIndex<6>>]>;
 
   class SVE2_VG2_Multi_Imm_Intrinsic
     : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
@@ -3358,14 +3399,14 @@ let TargetPrefix = "aarch64" in {
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               [IntrInaccessibleMemOnly]>;
+               [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SME2_ZA_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               [IntrInaccessibleMemOnly]>;
+               [IntrRead<[ZA]>, IntrWrite<[ZA]>]>;
 
   class SVE2_VG2_Multi_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -3483,50 +3524,50 @@ let TargetPrefix = "aarch64" in {
   class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                 [llvm_i32_ty],
-                [IntrReadMem, IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrReadOnly]>;
 
   class SME2_ZA_ArrayVector_Read_VG4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                              LLVMMatchType<0>,  LLVMMatchType<0>],
                 [llvm_i32_ty],
-                [IntrReadMem, IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrReadOnly]>;
 
   class SME2_Matrix_TileVector_Read_VG2_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                 [llvm_i32_ty, llvm_i32_ty],
-                [IntrReadMem, IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrReadOnly]>;
 
   class SME2_Matrix_TileVector_Read_VG4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                              LLVMMatchType<0>,  LLVMMatchType<0>],
                 [llvm_i32_ty, llvm_i32_ty],
-                [IntrReadMem, IntrInaccessibleMemOnly]>;
+                [IntrRead<[ZA]>, IntrReadOnly]>;
 
   class SME2_ZA_ArrayVector_Write_VG2_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               [IntrWriteMem, IntrInaccessibleMemOnly]>;
+               [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   class SME2_ZA_ArrayVector_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               [IntrWriteMem, IntrInaccessibleMemOnly]>;
+               [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   class SME2_Matrix_TileVector_Write_VG2_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty, llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>],
-               [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+               [IntrWrite<[ZA]>, IntrWriteOnly, ImmArg<ArgIndex<0>>]>;
 
   class SME2_Matrix_TileVector_Write_VG4_Intrinsic
    : DefaultAttrsIntrinsic<[],
                [llvm_i32_ty, llvm_i32_ty,
                 llvm_anyvector_ty, LLVMMatchType<0>,
                 LLVMMatchType<0>,  LLVMMatchType<0>],
-               [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>;
+               [IntrWrite<[ZA]>, IntrWriteOnly, ImmArg<ArgIndex<0>>]>;
 
   class SVE2_VG2_Multi_Single_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -3692,7 +3733,7 @@ let TargetPrefix = "aarch64" in {
   // Multi-vector zeroing
 
   foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in {
-    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty],  [IntrWriteMem, IntrInaccessibleMemOnly]>;
+    def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty],  [IntrWrite<[ZA]>, IntrWriteOnly]>;
   }
   // Multi-vector signed saturating doubling multiply high
   def int_aarch64_sve_sqdmulh_single_vgx2 : SVE2_VG2_Multi_Single_Intrinsic;
@@ -3830,14 +3871,14 @@ let TargetPrefix = "aarch64" in {
           [llvm_i32_ty,
           llvm_anyvector_ty, LLVMMatchType<0>,
           LLVMMatchType<0>],
-          [IntrInaccessibleMemOnly, IntrWriteMem]>;
+          [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   class SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
           LLVMMatchType<0>],
-        [IntrInaccessibleMemOnly, IntrWriteMem]>;
+        [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   def int_aarch64_sme_add_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic;
   def int_aarch64_sme_sub_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic;
@@ -3852,7 +3893,7 @@ let TargetPrefix = "aarch64" in {
           [llvm_i32_ty,
           llvm_anyvector_ty, LLVMMatchType<0>,
           LLVMMatchType<0>, LLVMMatchType<0>],
-          [IntrInaccessibleMemOnly, IntrWriteMem]>;
+          [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   class SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic
       : DefaultAttrsIntrinsic<[],
@@ -3860,7 +3901,7 @@ let TargetPrefix = "aarch64" in {
           llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
           LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
           LLVMMatchType<0>, LLVMMatchType<0>],
-          [IntrInaccessibleMemOnly, IntrWriteMem]>;
+          [IntrWrite<[ZA]>, IntrWriteOnly]>;
 
   def int_aarch64_sme_add_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic;
   def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic;
@@ -3981,37 +4022,38 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_sel_x2  : SVE2_VG2_Sel_Intrinsic;
   def int_aarch64_sve_sel_x4  : SVE2_VG4_Sel_Intrinsic;
 
+
   class SME_LDR_STR_ZT_Intrinsic
-    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], [IntrInaccessibleMemOrArgMemOnly]>;
+    : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], [IntrRead<[ArgMem, ZT0]>, IntrWrite<[ArgMem, ZT0]>]>;
   def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic;
   def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic;
 
   //
   //  Zero ZT0
   //
-  def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrWriteMem]>;
+  def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrWrite<[ZT0]>, IntrWriteOnly]>;
 
   //
   // Lookup table expand one register
   //
   def int_aarch64_sme_luti2_lane_zt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti4_lane_zt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti6_zt
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_i32_ty, llvm_nxv16i8_ty],
-                            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, IntrRead<[ZT0]>, IntrReadOnly]>;
 
   // Lookup table expand two registers
   //
   def int_aarch64_sme_luti2_lane_zt_x2
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti4_lane_zt_x2
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>, IntrReadOnly]>;
 
   //
   // Lookup table expand four registers
@@ -4019,11 +4061,11 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_luti2_lane_zt_x4
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti4_lane_zt_x4
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti6_lane_x4_x2
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                             [LLVMMatchType<0>, LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
@@ -4036,12 +4078,12 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_luti4_zt_x4
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, IntrRead<[ZT0]>, IntrReadOnly]>;
   def int_aarch64_sme_luti6_zt_x4
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                              llvm_nxv16i8_ty, llvm_nxv16i8_ty],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrReadMem]>;
+                            [ImmArg<ArgIndex<0>>, IntrRead<[ZT0]>, IntrReadOnly]>;
 
 
   //
@@ -4127,7 +4169,7 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_Cvt
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   def int_aarch64_sve_fp8_cvt1   : SVE2_FP8_Cvt;
   def int_aarch64_sve_fp8_cvt2   : SVE2_FP8_Cvt;
@@ -4138,29 +4180,26 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_Narrow_Cvt
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_anyvector_ty, LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
-
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
   def int_aarch64_sve_fp8_cvtn  : SVE2_FP8_Narrow_Cvt;
   def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt;
 
   def int_aarch64_sve_fp8_cvtnt
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   // Dot product
   class SVE2_FP8_FMLA_FDOT
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
-
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
   class SVE2_FP8_FMLA_FDOT_Lane
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                             [LLVMMatchType<0>,
                              llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
-
+                            [IntrRead<[FPMR]>, IntrReadOnly, ImmArg<ArgIndex<3>>]>;
   def int_aarch64_sve_fp8_fdot      : SVE2_FP8_FMLA_FDOT;
   def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane;
 
@@ -4186,69 +4225,70 @@ let TargetPrefix = "aarch64" in {
   class SVE2_FP8_CVT_X2_Single_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                             [llvm_nxv16i8_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   class SVE2_FP8_CVT_Single_X4_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   class SME_FP8_OuterProduct_Intrinsic
       : DefaultAttrsIntrinsic<[],
           [llvm_i32_ty,
           llvm_nxv16i1_ty, llvm_nxv16i1_ty,
           llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-          [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly]>;
+          [ImmArg<ArgIndex<0>>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_LANE_VGx1_Intrinsic
    : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                llvm_nxv16i8_ty,
                                llvm_nxv16i8_ty,
                                llvm_i32_ty],
-                          [IntrInaccessibleMemOnly, ImmArg<ArgIndex<3>>]>;
+                          [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg<ArgIndex<3>>]>;
 
   class SME_FP8_ZA_LANE_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_i32_ty],
-                            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<4>>]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg<ArgIndex<4>>]>;
 
   class SME_FP8_ZA_LANE_VGx4_Intrinsic
    : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_i32_ty],
-                            [IntrInaccessibleMemOnly, ImmArg<ArgIndex<6>>]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg<ArgIndex<6>>]>;
+
   class SME_FP8_ZA_VGx1_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_SINGLE_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_SINGLE_VGx4_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                 llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                 llvm_nxv16i8_ty],
-                              [IntrInaccessibleMemOnly]>;
+                              [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_MULTI_VGx2_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
 
   class SME_FP8_ZA_MULTI_VGx4_Intrinsic
     : DefaultAttrsIntrinsic<[], [llvm_i32_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty,
                                  llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
-                            [IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>;
   //
   // CVT from FP8 to half-precision/BFloat16 multi-vector
   //
@@ -4267,7 +4307,7 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sve_fp8_cvt_x2
     : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
                             [llvm_anyvector_ty, LLVMMatchType<0>],
-                            [IntrReadMem, IntrInaccessibleMemOnly]>;
+                            [IntrRead<[FPMR]>, IntrReadOnly]>;
 
   def int_aarch64_sve_fp8_cvt_x4  : SVE2_FP8_CVT_Single_X4_Intrinsic;
   def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic;

>From 910425f1a4ea52f5b6a04e7f55a5df812cd77424 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 24 Jun 2026 11:36:29 +0100
Subject: [PATCH 13/42] [NFC][Clang][Tests] Remove +bf16 from AArch64 tests
 that don't strictly need the feature flag. (#205336)

---
 .../fp8-intrinsics/acle_neon_fp8_reinterpret.c |  6 +++---
 clang/test/CodeGen/AArch64/neon/bf16-getset.c  |  6 +++---
 .../sme2p3-intrinsics/acle_sme2p3_luti6.c      | 10 +++++-----
 .../sve2p3-intrinsics/acle_sve2p3_luti6.c      | 10 +++++-----
 .../acle_sve2p3_luti6_lane_x2.c                | 18 +++++++++---------
 .../CodeGen/attr-arm-sve-vector-bits-bitcast.c |  6 +++---
 .../CodeGen/attr-arm-sve-vector-bits-codegen.c |  2 +-
 .../CodeGen/attr-arm-sve-vector-bits-globals.c |  4 ++--
 .../CodeGen/attr-arm-sve-vector-bits-types.c   | 12 ++++++------
 clang/test/CodeGen/svboolx2_t.cpp              |  2 +-
 clang/test/CodeGen/svboolx4_t.cpp              |  2 +-
 .../aarch64-mangle-sve-fixed-vectors.cpp       | 10 +++++-----
 .../CodeGenCXX/aarch64-mangle-sve-vectors.cpp  |  4 ++--
 .../CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp   | 10 +++++-----
 clang/test/CodeGenCXX/mangle-neon-vectors.cpp  | 12 +-----------
 clang/test/Index/index-builtin-sve.cpp         |  4 ++--
 clang/test/Sema/aarch64-bf16-ldst-intrinsics.c |  3 +--
 .../Sema/aarch64-incompat-sm-builtin-calls.cpp |  2 +-
 clang/test/Sema/aarch64-sme-func-attrs.c       |  4 ++--
 ...ch64-sme-streaming-nonstreaming-vl-checks.c |  4 ++--
 clang/test/Sema/aarch64-sme2p1-diagnostics.c   |  2 +-
 .../acle_sme2p1_imm.cpp                        |  3 +--
 .../acle_sme2p3_imm.c                          |  2 +-
 clang/test/Sema/attr-arm-sve-vector-bits.c     | 10 +++++-----
 .../test/SemaCXX/attr-arm-sve-vector-bits.cpp  |  2 +-
 25 files changed, 69 insertions(+), 81 deletions(-)

diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c
index adf5fceb9ceb9..a0b26fc1bb6e3 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c
@@ -1,9 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 #include <arm_neon.h>
-// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX
 
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -S -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -S -o /dev/null %s
 
 // REQUIRES: aarch64-registered-target
 
diff --git a/clang/test/CodeGen/AArch64/neon/bf16-getset.c b/clang/test/CodeGen/AArch64/neon/bf16-getset.c
index a00e2fcc01e81..a03a9d9cc5f40 100644
--- a/clang/test/CodeGen/AArch64/neon/bf16-getset.c
+++ b/clang/test/CodeGen/AArch64/neon/bf16-getset.c
@@ -1,8 +1,8 @@
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
-// RUN:                   %clang_cc1_cg_arm64_neon -target-feature +bf16           -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM
-// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +bf16 -fclangir -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM %}
-// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +bf16 -fclangir -emit-cir  %s -disable-O0-optnone |                               FileCheck %s --check-prefixes=ALL,CIR %}
+// RUN:                   %clang_cc1_cg_arm64_neon           -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM
+// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -fclangir -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM %}
+// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -fclangir -emit-cir  %s -disable-O0-optnone |                               FileCheck %s --check-prefixes=ALL,CIR %}
 
 #include <arm_neon.h>
 
diff --git a/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
index 656b0ce565833..d61b2b2254893 100644
--- a/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
+++ b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: aarch64-registered-target
 
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -S -O1 -Werror -o /dev/null %s
 
 #include <arm_sme.h>
 
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
index 11f0848af1c07..216482cfcea9b 100644
--- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: aarch64-registered-target
 
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -S -O1 -Werror -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c
index b6d8fe5cff531..e68152545d676 100644
--- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c
+++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c
@@ -1,15 +1,15 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
 // REQUIRES: aarch64-registered-target
 
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK
-// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK
-// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK
-// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK
+// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK
+// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK
+// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -S -O1 -Werror -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
index 63e87c7dd37d3..0d035880c3f42 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
 
 // REQUIRES: aarch64-registered-target
 
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
index 06fbb0027d7c1..6e70834623e17 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=4 -mvscale-max=4 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
 
 // REQUIRES: aarch64-registered-target
 
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
index ae011db633b6a..0d6540bc10d26 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-128
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-512
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-512
 
 // REQUIRES: aarch64-registered-target
 
diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
index c6d5d2d2cffdc..b9fc43335584c 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=8 -mvscale-max=8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-1024
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=16 -mvscale-max=16 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-2048
-// RUN: %clang_cc1 -triple aarch64_32-unknown-darwin -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ILP32
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=2 -mvscale-max=2 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=8 -mvscale-max=8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-1024
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=16 -mvscale-max=16 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-2048
+// RUN: %clang_cc1 -triple aarch64_32-unknown-darwin -target-feature +sve -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ILP32
 
 // REQUIRES: aarch64-registered-target
 
diff --git a/clang/test/CodeGen/svboolx2_t.cpp b/clang/test/CodeGen/svboolx2_t.cpp
index 0b798afb8c642..70572d9563c2e 100644
--- a/clang/test/CodeGen/svboolx2_t.cpp
+++ b/clang/test/CodeGen/svboolx2_t.cpp
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -emit-llvm -o - %s | FileCheck %s
 
 // CHECK-LABEL: @_Z3foo10svboolx2_t(
 // CHECK-NEXT:  entry:
diff --git a/clang/test/CodeGen/svboolx4_t.cpp b/clang/test/CodeGen/svboolx4_t.cpp
index d849896bad85f..dc4736856abae 100644
--- a/clang/test/CodeGen/svboolx4_t.cpp
+++ b/clang/test/CodeGen/svboolx4_t.cpp
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -emit-llvm -o - %s | FileCheck %s
 
 // CHECK-LABEL: @_Z3foo10svboolx4_t(
 // CHECK-NEXT:  entry:
diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
index d0bde80a34057..835abc175b6ef 100644
--- a/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
+++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp
@@ -1,17 +1,17 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 \
+// RUN:  -target-feature +sve -mvscale-min=1 -mvscale-max=1 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-128
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 \
+// RUN:  -target-feature +sve -mvscale-min=2 -mvscale-max=2 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-256
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 \
+// RUN:  -target-feature +sve -mvscale-min=4 -mvscale-max=4 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=8 -mvscale-max=8 \
+// RUN:  -target-feature +sve -mvscale-min=8 -mvscale-max=8 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=16 -mvscale-max=16 \
+// RUN:  -target-feature +sve -mvscale-min=16 -mvscale-max=16 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-2048
 
 #define N __ARM_FEATURE_SVE_BITS
diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp
index 713d0a2b1cb51..292c89d6a0371 100644
--- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp
+++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp
@@ -1,8 +1,8 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:   -target-feature +sve -target-feature +bf16 | FileCheck %s
+// RUN:   -target-feature +sve | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:   -target-feature +sve -target-feature +bf16 -fclang-abi-compat=17 | FileCheck %s --check-prefix=COMPAT_17
+// RUN:   -target-feature +sve -fclang-abi-compat=17 | FileCheck %s --check-prefix=COMPAT_17
 
 void f(__SVInt8_t, __SVInt8_t);
 void f(__SVInt16_t, __SVInt16_t);
diff --git a/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp b/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
index dcc2c2e0af85d..66fc29a4a1696 100644
--- a/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
+++ b/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
@@ -1,17 +1,17 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 \
+// RUN:  -target-feature +sve -mvscale-min=1 -mvscale-max=1 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-128
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 \
+// RUN:  -target-feature +sve -mvscale-min=2 -mvscale-max=2 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-256
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 \
+// RUN:  -target-feature +sve -mvscale-min=4 -mvscale-max=4 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-512
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=8 -mvscale-max=8 \
+// RUN:  -target-feature +sve -mvscale-min=8 -mvscale-max=8 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-1024
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
-// RUN:  -target-feature +sve -target-feature +bf16 -mvscale-min=16 -mvscale-max=16 \
+// RUN:  -target-feature +sve -mvscale-min=16 -mvscale-max=16 \
 // RUN:  | FileCheck %s --check-prefix=CHECK-2048
 
 #define N __ARM_FEATURE_SVE_BITS
diff --git a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
index 2139a8ae98caf..a4ca0581c195b 100644
--- a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
+++ b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp
@@ -1,7 +1,6 @@
-// RUN: %clang_cc1 -triple armv7-apple-ios -target-feature +neon  %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple armv7-apple-ios -target-feature +neon %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon %s -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AARCH64
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-feature +bf16 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AARCH64-BF16
 
 typedef float float32_t;
 typedef double float64_t;
@@ -15,10 +14,7 @@ typedef signed char poly8_t;
 typedef short poly16_t;
 #endif
 typedef unsigned __INT64_TYPE__ uint64_t;
-
-#if defined(__ARM_FEATURE_BF16)
 typedef __bf16 bfloat16_t;
-#endif
 
 typedef __attribute__((neon_vector_type(2))) int int32x2_t;
 typedef __attribute__((neon_vector_type(4))) int int32x4_t;
@@ -35,10 +31,7 @@ typedef __attribute__((neon_vector_type(16))) mfloat8_t mfloat8x16_t;
 #endif
 typedef __attribute__((neon_polyvector_type(16))) poly8_t  poly8x16_t;
 typedef __attribute__((neon_polyvector_type(8)))  poly16_t poly16x8_t;
-
-#if defined(__ARM_FEATURE_BF16)
 typedef __attribute__((neon_vector_type(4))) __bf16 bfloat16x4_t;
-#endif
 
 // CHECK: 16__simd64_int32_t
 // CHECK-AARCH64: 11__Int32x2_t
@@ -85,11 +78,8 @@ void f10(poly16x8_t v) {}
 void f11(float64x2_t v) { }
 #endif
 
-#if defined(__ARM_FEATURE_BF16)
 // CHECK-AARCH64-BF16: 14__Bfloat16x4_t
 void f12(bfloat16x4_t v) {}
-#endif
-
 
 #ifdef __aarch64__
 // CHECK-AARCH64: 13__Mfloat8x8_t
diff --git a/clang/test/Index/index-builtin-sve.cpp b/clang/test/Index/index-builtin-sve.cpp
index cc148a21578fa..95fca71004b74 100644
--- a/clang/test/Index/index-builtin-sve.cpp
+++ b/clang/test/Index/index-builtin-sve.cpp
@@ -1,7 +1,7 @@
+// RUN: c-index-test -index-file %s --target=aarch64 -target-feature +sve -std=c++11 | FileCheck %s
+//
 void testSve(__SVInt8_t sve);
 // CHECK: USR: c:@F at testSve#@BT at __SVInt8_t#
 
 void testBf16(__bf16);
 // CHECK: USR: c:@F at testBf16#@BT at __bf16#
-
-// RUN: c-index-test -index-file %s --target=aarch64 -target-feature +bf16 -target-feature +sve -std=c++11 | FileCheck %s
diff --git a/clang/test/Sema/aarch64-bf16-ldst-intrinsics.c b/clang/test/Sema/aarch64-bf16-ldst-intrinsics.c
index 55cd8b772831a..a7d0d31274a1a 100644
--- a/clang/test/Sema/aarch64-bf16-ldst-intrinsics.c
+++ b/clang/test/Sema/aarch64-bf16-ldst-intrinsics.c
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \
-// RUN:  -O2 -verify -fsyntax-only %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -O2 -verify -fsyntax-only %s
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
 
diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.cpp b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.cpp
index 3fbcaf4a13d67..15be24c1e477c 100644
--- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.cpp
+++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.cpp
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // RUN: %clang_cc1  -std=c++23 -triple aarch64-none-linux-gnu -target-feature +sve \
-// RUN:   -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -target-feature +sve2 -target-feature +neon -Waarch64-sme-attributes -fsyntax-only -verify %s
+// RUN:   -target-feature +sve -target-feature +sme -target-feature +sme2 -target-feature +sve2 -target-feature +neon -Waarch64-sme-attributes -fsyntax-only -verify %s
 
 // REQUIRES: aarch64-registered-target
 
diff --git a/clang/test/Sema/aarch64-sme-func-attrs.c b/clang/test/Sema/aarch64-sme-func-attrs.c
index 1543e990dd042..77b8520f41dc1 100644
--- a/clang/test/Sema/aarch64-sme-func-attrs.c
+++ b/clang/test/Sema/aarch64-sme-func-attrs.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-cpp -x c++ %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-cpp -x c++ %s
 
 // Valid attributes
 
diff --git a/clang/test/Sema/aarch64-sme-streaming-nonstreaming-vl-checks.c b/clang/test/Sema/aarch64-sme-streaming-nonstreaming-vl-checks.c
index 41d89869062ff..949b976f9cae8 100644
--- a/clang/test/Sema/aarch64-sme-streaming-nonstreaming-vl-checks.c
+++ b/clang/test/Sema/aarch64-sme-streaming-nonstreaming-vl-checks.c
@@ -1,9 +1,9 @@
 // Case 1: No vscale flags — should only produce warnings
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-noflags %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-noflags %s
 
 // Case 2: Explicit mismatch in vscale flags — should produce errors for 
 // streaming and non-streaming callers
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -mvscale-min=1 -mvscale-max=1 -mvscale-streaming-min=2 -mvscale-streaming-max=2 -verify=expected-flags %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -mvscale-min=1 -mvscale-max=1 -mvscale-streaming-min=2 -mvscale-streaming-max=2 -verify=expected-flags %s
 
 void sme_streaming_with_vl_arg(__SVInt8_t a) __arm_streaming;
 
diff --git a/clang/test/Sema/aarch64-sme2p1-diagnostics.c b/clang/test/Sema/aarch64-sme2p1-diagnostics.c
index 2a25039e120b1..8a4a1e2f48155 100644
--- a/clang/test/Sema/aarch64-sme2p1-diagnostics.c
+++ b/clang/test/Sema/aarch64-sme2p1-diagnostics.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu  -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -fsyntax-only -verify %s
 
 // REQUIRES: aarch64-registered-target
 #include "arm_sme.h"
diff --git a/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp
index 06b1e8301ce49..d299114c9a3b2 100644
--- a/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
-// RUN:    -target-feature +sme -target-feature +sme2p1 -target-feature +bf16 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p1 -fsyntax-only -verify %s
 
 // REQUIRES: aarch64-registered-target
 
diff --git a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
index 25c35fbcbcc7b..b9c8380949562 100644
--- a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
+++ b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c
@@ -1,6 +1,6 @@
 // REQUIRES: aarch64-registered-target
 
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -fsyntax-only -verify %s
 
 #include <arm_sme.h>
 
diff --git a/clang/test/Sema/attr-arm-sve-vector-bits.c b/clang/test/Sema/attr-arm-sve-vector-bits.c
index 9e9e72ef4254d..1f6b01aa87669 100644
--- a/clang/test/Sema/attr-arm-sve-vector-bits.c
+++ b/clang/test/Sema/attr-arm-sve-vector-bits.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=1 -mvscale-max=1 %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=2 -mvscale-max=2 -mvscale-streaming-min=2 %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected -mvscale-min=4 -mvscale-max=4 -mvscale-streaming-min=4 -mvscale-streaming-max=4 %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=8 -mvscale-max=8 -mvscale-streaming-min=4 -mvscale-streaming-max=8 %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=16 -mvscale-max=16 %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=1 -mvscale-max=1 %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=2 -mvscale-max=2 -mvscale-streaming-min=2 %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected -mvscale-min=4 -mvscale-max=4 -mvscale-streaming-min=4 -mvscale-streaming-max=4 %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=8 -mvscale-max=8 -mvscale-streaming-min=4 -mvscale-streaming-max=8 %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=16 -mvscale-max=16 %s
 
 #include <stdint.h>
 
diff --git a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
index 1127f31619e77..93326a0afa279 100644
--- a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
+++ b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -Wconversion %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -Wconversion %s
 // expected-no-diagnostics
 
 #include <stdint.h>

>From 14840af5d694f40dbc8b1733330da2db4000335c Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Wed, 24 Jun 2026 05:41:38 -0500
Subject: [PATCH 14/42] [flang][OpenMP] Check that IF clause applies to at most
 one leaf (#205164)

This also allows placing the IF clause in the "allowedClauses" set for
all directives, instead of having it in "allowedOnceClauses" for some
directives and in "allowedClauses" for others.

The emitted diagnostic will show which constituent has multiple IF
clauses applying to it:
```
if.f90:4:35: error: At most one IF clause can apply to each directive constituent
    !$omp & if(target teams: x > 0) if(teams distribute: y > 0)
                                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^
if.f90:4:11: Previous IF clause applying to the TEAMS constituent
    !$omp & if(target teams: x > 0) if(teams distribute: y > 0)
            ^^^^^^^^^^^^^^^^^^^^^^^
```
---
 flang/lib/Semantics/check-omp-structure.cpp   | 115 ++++++++++++++----
 flang/lib/Semantics/check-omp-structure.h     |   5 +
 .../Semantics/OpenMP/device-constructs.f90    |   4 +-
 .../OpenMP/if-clause-45-suggestion.f90        |  18 +++
 flang/test/Semantics/OpenMP/if-clause-45.f90  |  29 ++---
 .../OpenMP/if-clause-50-suggestion.f90        |  14 +++
 flang/test/Semantics/OpenMP/if-clause-50.f90  |  32 +++--
 flang/test/Semantics/OpenMP/if-clause-60.f90  |  12 ++
 flang/test/Semantics/OpenMP/if-clause.f90     |  34 +++---
 llvm/include/llvm/Frontend/OpenMP/OMP.td      |  71 ++++++-----
 10 files changed, 223 insertions(+), 111 deletions(-)
 create mode 100644 flang/test/Semantics/OpenMP/if-clause-45-suggestion.f90
 create mode 100644 flang/test/Semantics/OpenMP/if-clause-50-suggestion.f90
 create mode 100644 flang/test/Semantics/OpenMP/if-clause-60.f90

diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index d87b2f1983de6..816b8fd2f149d 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -3359,6 +3359,10 @@ void OmpStructureChecker::Leave(const parser::OmpEndDirective &x) {
   }
 }
 
+void OmpStructureChecker::Enter(const parser::OmpClauseList &) {
+  ifLeafs_.clear();
+}
+
 // Clauses
 // Mainly categorized as
 // 1. Checks on 'OmpClauseList' from 'parse-tree.h'.
@@ -4317,6 +4321,13 @@ void OmpStructureChecker::Enter(const parser::OmpClause::If &x) {
     return false;
   }};
 
+  // The directive-name to which the clause applies. [Note: The directive-
+  // name-modifier is not necessarily a valid directive name, but that's how
+  // it's currently modeled.]
+  // This will be set only after other checks pass to avoid emitting irrelevant
+  // diagnostics.
+  llvm::omp::Directive appliesTo{llvm::omp::Directive::OMPD_unknown};
+
   if (!OmpVerifyModifiers(
           x.v, llvm::omp::OMPC_if, GetContext().clauseSource, context_)) {
     return;
@@ -4334,39 +4345,91 @@ void OmpStructureChecker::Enter(const parser::OmpClause::If &x) {
     std::string modName{desc.name.str()};
 
     if (!isConstituent(dir, sub)) {
-      context_
-          .Say(modifierSource,
-              "%s is not a constituent of the %s directive"_err_en_US, subName,
-              dirName)
-          .Attach(
-              GetContext().directiveSource, "Cannot apply to directive"_en_US);
+      context_.Say(modifierSource,
+          "%s is not a constituent of the %s directive"_err_en_US, subName,
+          dirName);
     } else {
-      static llvm::omp::Directive valid45[]{
-          llvm::omp::OMPD_cancel, //
-          llvm::omp::OMPD_parallel, //
-          /* OMP 5.0+ also allows OMPD_simd */
-          llvm::omp::OMPD_target, //
-          llvm::omp::OMPD_target_data, //
-          llvm::omp::OMPD_target_enter_data, //
-          llvm::omp::OMPD_target_exit_data, //
-          llvm::omp::OMPD_target_update, //
-          llvm::omp::OMPD_task, //
-          llvm::omp::OMPD_taskloop, //
-          /* OMP 5.2+ also allows OMPD_teams */
+      static OmpDirectiveSet valid45{
+          llvm::omp::Directive::OMPD_cancel, //
+          llvm::omp::Directive::OMPD_parallel, //
+          llvm::omp::Directive::OMPD_target, //
+          llvm::omp::Directive::OMPD_target_data, //
+          llvm::omp::Directive::OMPD_target_enter_data, //
+          llvm::omp::Directive::OMPD_target_exit_data, //
+          llvm::omp::Directive::OMPD_target_update, //
+          llvm::omp::Directive::OMPD_task, //
+          llvm::omp::Directive::OMPD_taskloop, //
       };
-      if (version < 50 && sub == llvm::omp::OMPD_simd) {
+      static OmpDirectiveSet valid50{
+          valid45 | OmpDirectiveSet{llvm::omp::Directive::OMPD_simd}};
+      // 5.1 is the same as 5.0.
+      static OmpDirectiveSet valid52{
+          valid50 | OmpDirectiveSet{llvm::omp::Directive::OMPD_teams}};
+      static OmpDirectiveSet valid60{valid52 |
+          OmpDirectiveSet{llvm::omp::Directive::OMPD_taskgraph,
+              /*TODO llvm::omp::Directive::OMPD_task_iteration*/}};
+
+      static auto minVersion{[&](llvm::omp::Directive d) {
+        if (valid45.test(d)) {
+          return 45;
+        }
+        if (valid50.test(d)) {
+          return 50;
+        }
+        if (valid52.test(d)) {
+          return 52;
+        }
+        if (valid60.test(d)) {
+          return 60;
+        }
+        return 0;
+      }};
+      static auto suggest{[&](unsigned v) -> std::string {
+        if (v != 0) {
+          return ", " + TryVersion(v);
+        } else {
+          return "";
+        }
+      }};
+
+      if (version <= 45 && !valid45.test(sub)) {
+        context_.Say(modifierSource,
+            "%s is not allowed as '%s' in %s%s"_err_en_US, subName, modName,
+            ThisVersion(version), suggest(minVersion(sub)));
+      } else if (version <= 51 && !valid50.test(sub)) {
         context_.Say(modifierSource,
-            "%s is not allowed as '%s' in %s, %s"_warn_en_US, subName, modName,
-            ThisVersion(version), TryVersion(50));
-      } else if (version < 52 && sub == llvm::omp::OMPD_teams) {
+            "%s is not allowed as '%s' in %s%s"_err_en_US, subName, modName,
+            ThisVersion(version), suggest(minVersion(sub)));
+      } else if (version <= 52 && !valid52.test(sub)) {
         context_.Say(modifierSource,
-            "%s is not allowed as '%s' in %s, %s"_warn_en_US, subName, modName,
-            ThisVersion(version), TryVersion(52));
-      } else if (!llvm::is_contained(valid45, sub) &&
-          sub != llvm::omp::OMPD_simd && sub != llvm::omp::OMPD_teams) {
+            "%s is not allowed as '%s' in %s%s"_err_en_US, subName, modName,
+            ThisVersion(version), suggest(minVersion(sub)));
+      } else if (!valid60.test(sub)) {
         context_.Say(modifierSource,
             "%s is not allowed as '%s' in %s"_err_en_US, subName, modName,
             ThisVersion(version));
+      } else {
+        appliesTo = sub;
+      }
+    }
+  } else {
+    appliesTo = GetContext().directive;
+  }
+
+  if (appliesTo != llvm::omp::Directive::OMPD_unknown) {
+    parser::CharBlock source{GetContext().clauseSource};
+    for (auto leaf : llvm::omp::getLeafConstructsOrSelf(appliesTo)) {
+      auto pair{ifLeafs_.try_emplace(leaf, source)};
+      if (!pair.second) {
+        std::string ifName{GetUpperName(llvm::omp::Clause::OMPC_if, version)};
+        context_
+            .Say(source,
+                "At most one %s clause can apply to each directive constituent"_err_en_US,
+                ifName)
+            .Attach(pair.first->second,
+                "Previous %s clause applying to the %s constituent"_en_US,
+                ifName, GetUpperName(leaf, version));
+        break;
       }
     }
   }
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 1283feb32ef5f..8d9b25fb2a11d 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -131,6 +131,7 @@ class OmpStructureChecker : public OmpStructureCheckerBase {
   void Enter(const parser::OpenMPCriticalConstruct &);
   void Enter(const parser::OpenMPAtomicConstruct &);
 
+  void Enter(const parser::OmpClauseList &);
   void Leave(const parser::OmpClauseList &);
   void Enter(const parser::OmpClause &);
 
@@ -388,6 +389,10 @@ class OmpStructureChecker : public OmpStructureCheckerBase {
   int allocateDirectiveLevel_{0};
   parser::CharBlock visitedAtomicSource_;
 
+  // Mapping of directive-name-modifier constituents to the sources of the
+  // IF clauses that referenced them. If there was no modifier, the entire
+  // directive is assumed to be listed.
+  std::map<llvm::omp::Directive, parser::CharBlock> ifLeafs_;
   // Stack of nested DO loops and OpenMP constructs.
   // This is used to verify DO loop nest for DOACROSS, and branches into
   // and out of OpenMP constructs.
diff --git a/flang/test/Semantics/OpenMP/device-constructs.f90 b/flang/test/Semantics/OpenMP/device-constructs.f90
index db04e7db155ad..d74d720b2d35d 100644
--- a/flang/test/Semantics/OpenMP/device-constructs.f90
+++ b/flang/test/Semantics/OpenMP/device-constructs.f90
@@ -169,7 +169,7 @@ program main
   !ERROR: The device expression of the DEVICE clause must be a non-negative integer expression, 'omp_initial_device' (-1), or 'omp_invalid_device' (-2)
   !$omp target exit data map(delete:A) device(-3)
 
-  !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target enter data map(to:a) if(.true.) if(.false.)
 
   !ERROR: Only the ALLOC, TO, TOFROM map types are permitted for MAP clauses on the TARGET ENTER DATA directive
@@ -185,7 +185,7 @@ program main
 
   !$omp target update if(.true.) device(1) to(a) from(b) depend(inout:c) nowait
 
-  !ERROR: At most one IF clause can appear on the TARGET UPDATE directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target update to(a) if(.true.) if(.false.)
 
   !ERROR: At most one DEVICE clause can appear on the TARGET UPDATE directive
diff --git a/flang/test/Semantics/OpenMP/if-clause-45-suggestion.f90 b/flang/test/Semantics/OpenMP/if-clause-45-suggestion.f90
new file mode 100644
index 0000000000000..98ac83d7f4999
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/if-clause-45-suggestion.f90
@@ -0,0 +1,18 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=45
+
+subroutine f
+  !OK
+  !$omp parallel if(.false.)
+  !$omp end parallel
+
+  !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50
+  !$omp simd if(.true.)
+  do i = 1, 10
+  end do
+
+  !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52
+  !$omp teams if(.true.)
+  !$omp end teams
+
+  !No test for 6.0 because it requires a directive that is not in 4.5
+end
diff --git a/flang/test/Semantics/OpenMP/if-clause-45.f90 b/flang/test/Semantics/OpenMP/if-clause-45.f90
index b013a33094727..4f444907b1617 100644
--- a/flang/test/Semantics/OpenMP/if-clause-45.f90
+++ b/flang/test/Semantics/OpenMP/if-clause-45.f90
@@ -24,7 +24,7 @@ program main
   end do
   !$omp end distribute parallel do
 
-  !ERROR: At most one IF clause can appear on the DISTRIBUTE PARALLEL DO directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp distribute parallel do if(.true.) if(parallel: .false.)
   do i = 1, 10
   end do
@@ -79,7 +79,6 @@ program main
 
   !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50
   !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50
-  !ERROR: At most one IF clause can appear on the DISTRIBUTE SIMD directive
   !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50
   !$omp distribute simd if(.true.) if(simd: .false.)
   do i = 1, 10
@@ -112,7 +111,6 @@ program main
 
   !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50
   !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50
-  !ERROR: At most one IF clause can appear on the DO SIMD directive
   !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50
   !$omp do simd if(.true.) if(simd: .false.)
   do i = 1, 10
@@ -132,7 +130,7 @@ program main
   !$omp parallel if(target: .true.)
   !$omp end parallel
 
-  !ERROR: At most one IF clause can appear on the PARALLEL directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel if(.true.) if(parallel: .false.)
   !$omp end parallel
 
@@ -155,7 +153,7 @@ program main
   end do
   !$omp end parallel do
 
-  !ERROR: At most one IF clause can appear on the PARALLEL DO directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel do if(.true.) if(parallel: .false.)
   do i = 1, 10
   end do
@@ -194,7 +192,7 @@ program main
   !$omp parallel sections if(target: .true.)
   !$omp end parallel sections
 
-  !ERROR: At most one IF clause can appear on the PARALLEL SECTIONS directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel sections if(.true.) if(parallel: .false.)
   !$omp end parallel sections
 
@@ -211,7 +209,7 @@ program main
   !$omp parallel workshare if(target: .true.)
   !$omp end parallel workshare
 
-  !ERROR: At most one IF clause can appear on the PARALLEL WORKSHARE directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel workshare if(.true.) if(parallel: .false.)
   !$omp end parallel workshare
 
@@ -240,7 +238,6 @@ program main
 
   !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50
   !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50
-  !ERROR: At most one IF clause can appear on the SIMD directive
   !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50
   !$omp simd if(.true.) if(simd: .false.)
   do i = 1, 10
@@ -260,7 +257,7 @@ program main
   !$omp target if(parallel: .true.)
   !$omp end target
 
-  !ERROR: At most one IF clause can appear on the TARGET directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target if(.true.) if(target: .false.)
   !$omp end target
 
@@ -277,7 +274,7 @@ program main
   !$omp target data map(tofrom: i) if(target: .true.)
   !$omp end target data
 
-  !ERROR: At most one IF clause can appear on the TARGET DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target data map(tofrom: i) if(.true.) if(target data: .false.)
   !$omp end target data
 
@@ -291,7 +288,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET ENTER DATA directive
   !$omp target enter data map(to: i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target enter data map(to: i) if(.true.) if(target enter data: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -304,7 +301,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET EXIT DATA directive
   !$omp target exit data map(from: i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET EXIT DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target exit data map(from: i) if(.true.) if(target exit data: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -490,7 +487,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET UPDATE directive
   !$omp target update to(i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET UPDATE directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target update to(i) if(.true.) if(target update: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -506,7 +503,7 @@ program main
   !$omp task if(target: .true.)
   !$omp end task
 
-  !ERROR: At most one IF clause can appear on the TASK directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp task if(.true.) if(task: .false.)
   !$omp end task
 
@@ -529,7 +526,7 @@ program main
   end do
   !$omp end taskloop
 
-  !ERROR: At most one IF clause can appear on the TASKLOOP directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp taskloop if(.true.) if(taskloop: .false.)
   do i = 1, 10
   end do
@@ -574,7 +571,6 @@ program main
 
   !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52
   !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52
-  !ERROR: At most one IF clause can appear on the TEAMS directive
   !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52
   !$omp teams if(.true.) if(teams: .false.)
   !$omp end teams
@@ -599,7 +595,6 @@ program main
   end do
   !$omp end teams distribute
 
-  !ERROR: At most one IF clause can appear on the TEAMS DISTRIBUTE directive
   !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52
   !$omp teams distribute if(.true.) if(teams: .true.)
   do i = 1, 10
diff --git a/flang/test/Semantics/OpenMP/if-clause-50-suggestion.f90 b/flang/test/Semantics/OpenMP/if-clause-50-suggestion.f90
new file mode 100644
index 0000000000000..f8a4452d01951
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/if-clause-50-suggestion.f90
@@ -0,0 +1,14 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50
+
+subroutine f
+  !OK
+  !$omp simd if(.true.)
+  do i = 1, 10
+  end do
+
+  !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52
+  !$omp teams if(.true.)
+  !$omp end teams
+
+  !No test for 6.0 because it requires a directive that is not in 5.0
+end
diff --git a/flang/test/Semantics/OpenMP/if-clause-50.f90 b/flang/test/Semantics/OpenMP/if-clause-50.f90
index 3c385cdad9e65..23d7738949404 100644
--- a/flang/test/Semantics/OpenMP/if-clause-50.f90
+++ b/flang/test/Semantics/OpenMP/if-clause-50.f90
@@ -24,7 +24,7 @@ program main
   end do
   !$omp end distribute parallel do
 
-  !ERROR: At most one IF clause can appear on the DISTRIBUTE PARALLEL DO directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp distribute parallel do if(.true.) if(parallel: .false.)
   do i = 1, 10
   end do
@@ -72,7 +72,7 @@ program main
   end do
   !$omp end distribute simd
 
-  !ERROR: At most one IF clause can appear on the DISTRIBUTE SIMD directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp distribute simd if(.true.) if(simd: .false.)
   do i = 1, 10
   end do
@@ -98,7 +98,7 @@ program main
   end do
   !$omp end do simd
 
-  !ERROR: At most one IF clause can appear on the DO SIMD directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp do simd if(.true.) if(simd: .false.)
   do i = 1, 10
   end do
@@ -117,7 +117,7 @@ program main
   !$omp parallel if(target: .true.)
   !$omp end parallel
 
-  !ERROR: At most one IF clause can appear on the PARALLEL directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel if(.true.) if(parallel: .false.)
   !$omp end parallel
 
@@ -140,7 +140,7 @@ program main
   end do
   !$omp end parallel do
 
-  !ERROR: At most one IF clause can appear on the PARALLEL DO directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel do if(.true.) if(parallel: .false.)
   do i = 1, 10
   end do
@@ -178,7 +178,7 @@ program main
   !$omp parallel sections if(target: .true.)
   !$omp end parallel sections
 
-  !ERROR: At most one IF clause can appear on the PARALLEL SECTIONS directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel sections if(.true.) if(parallel: .false.)
   !$omp end parallel sections
 
@@ -195,7 +195,7 @@ program main
   !$omp parallel workshare if(target: .true.)
   !$omp end parallel workshare
 
-  !ERROR: At most one IF clause can appear on the PARALLEL WORKSHARE directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel workshare if(.true.) if(parallel: .false.)
   !$omp end parallel workshare
 
@@ -218,7 +218,7 @@ program main
   end do
   !$omp end simd
 
-  !ERROR: At most one IF clause can appear on the SIMD directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp simd if(.true.) if(simd: .false.)
   do i = 1, 10
   end do
@@ -237,7 +237,7 @@ program main
   !$omp target if(parallel: .true.)
   !$omp end target
 
-  !ERROR: At most one IF clause can appear on the TARGET directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target if(.true.) if(target: .false.)
   !$omp end target
 
@@ -254,7 +254,7 @@ program main
   !$omp target data map(tofrom: i) if(target: .true.)
   !$omp end target data
 
-  !ERROR: At most one IF clause can appear on the TARGET DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target data map(tofrom: i) if(.true.) if(target data: .false.)
   !$omp end target data
 
@@ -268,7 +268,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET ENTER DATA directive
   !$omp target enter data map(to: i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target enter data map(to: i) if(.true.) if(target enter data: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -281,7 +281,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET EXIT DATA directive
   !$omp target exit data map(from: i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET EXIT DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target exit data map(from: i) if(.true.) if(target exit data: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -463,7 +463,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET UPDATE directive
   !$omp target update to(i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET UPDATE directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target update to(i) if(.true.) if(target update: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -479,7 +479,7 @@ program main
   !$omp task if(target: .true.)
   !$omp end task
 
-  !ERROR: At most one IF clause can appear on the TASK directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp task if(.true.) if(task: .false.)
   !$omp end task
 
@@ -502,7 +502,7 @@ program main
   end do
   !$omp end taskloop
 
-  !ERROR: At most one IF clause can appear on the TASKLOOP directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp taskloop if(.true.) if(taskloop: .false.)
   do i = 1, 10
   end do
@@ -546,7 +546,6 @@ program main
 
   !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52
   !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52
-  !ERROR: At most one IF clause can appear on the TEAMS directive
   !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52
   !$omp teams if(.true.) if(teams: .false.)
   !$omp end teams
@@ -571,7 +570,6 @@ program main
   end do
   !$omp end teams distribute
 
-  !ERROR: At most one IF clause can appear on the TEAMS DISTRIBUTE directive
   !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52
   !$omp teams distribute if(.true.) if(teams: .true.)
   do i = 1, 10
diff --git a/flang/test/Semantics/OpenMP/if-clause-60.f90 b/flang/test/Semantics/OpenMP/if-clause-60.f90
new file mode 100644
index 0000000000000..3105bb3251e61
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/if-clause-60.f90
@@ -0,0 +1,12 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+
+subroutine f
+!ERROR: At most one IF clause can apply to each directive constituent
+  !$omp taskgraph if(.true.) if(.false.)
+  !$omp end taskgraph
+
+!ERROR: PARALLEL LOOP is not a constituent of the TEAMS LOOP directive
+  !$omp teams loop if(parallel loop: .false.)
+  do i = 1, 10
+  end do
+end
diff --git a/flang/test/Semantics/OpenMP/if-clause.f90 b/flang/test/Semantics/OpenMP/if-clause.f90
index 5e19c78a1ce76..ce11020e50f59 100644
--- a/flang/test/Semantics/OpenMP/if-clause.f90
+++ b/flang/test/Semantics/OpenMP/if-clause.f90
@@ -24,7 +24,7 @@ program main
   end do
   !$omp end distribute parallel do
 
-  !ERROR: At most one IF clause can appear on the DISTRIBUTE PARALLEL DO directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp distribute parallel do if(.true.) if(parallel: .false.)
   do i = 1, 10
   end do
@@ -72,7 +72,7 @@ program main
   end do
   !$omp end distribute simd
 
-  !ERROR: At most one IF clause can appear on the DISTRIBUTE SIMD directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp distribute simd if(.true.) if(simd: .false.)
   do i = 1, 10
   end do
@@ -98,7 +98,7 @@ program main
   end do
   !$omp end do simd
 
-  !ERROR: At most one IF clause can appear on the DO SIMD directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp do simd if(.true.) if(simd: .false.)
   do i = 1, 10
   end do
@@ -117,7 +117,7 @@ program main
   !$omp parallel if(target: .true.)
   !$omp end parallel
 
-  !ERROR: At most one IF clause can appear on the PARALLEL directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel if(.true.) if(parallel: .false.)
   !$omp end parallel
 
@@ -140,7 +140,7 @@ program main
   end do
   !$omp end parallel do
 
-  !ERROR: At most one IF clause can appear on the PARALLEL DO directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel do if(.true.) if(parallel: .false.)
   do i = 1, 10
   end do
@@ -178,7 +178,7 @@ program main
   !$omp parallel sections if(target: .true.)
   !$omp end parallel sections
 
-  !ERROR: At most one IF clause can appear on the PARALLEL SECTIONS directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel sections if(.true.) if(parallel: .false.)
   !$omp end parallel sections
 
@@ -195,7 +195,7 @@ program main
   !$omp parallel workshare if(target: .true.)
   !$omp end parallel workshare
 
-  !ERROR: At most one IF clause can appear on the PARALLEL WORKSHARE directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp parallel workshare if(.true.) if(parallel: .false.)
   !$omp end parallel workshare
 
@@ -218,7 +218,7 @@ program main
   end do
   !$omp end simd
 
-  !ERROR: At most one IF clause can appear on the SIMD directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp simd if(.true.) if(simd: .false.)
   do i = 1, 10
   end do
@@ -237,7 +237,7 @@ program main
   !$omp target if(parallel: .true.)
   !$omp end target
 
-  !ERROR: At most one IF clause can appear on the TARGET directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target if(.true.) if(target: .false.)
   !$omp end target
 
@@ -254,7 +254,7 @@ program main
   !$omp target data map(tofrom: i) if(target: .true.)
   !$omp end target data
 
-  !ERROR: At most one IF clause can appear on the TARGET DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target data map(tofrom: i) if(.true.) if(target data: .false.)
   !$omp end target data
 
@@ -268,7 +268,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET ENTER DATA directive
   !$omp target enter data map(to: i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target enter data map(to: i) if(.true.) if(target enter data: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -281,7 +281,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET EXIT DATA directive
   !$omp target exit data map(from: i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET EXIT DATA directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target exit data map(from: i) if(.true.) if(target exit data: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -458,7 +458,7 @@ program main
   !ERROR: TARGET is not a constituent of the TARGET UPDATE directive
   !$omp target update to(i) if(target: .true.)
 
-  !ERROR: At most one IF clause can appear on the TARGET UPDATE directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp target update to(i) if(.true.) if(target update: .false.)
 
   ! ----------------------------------------------------------------------------
@@ -474,7 +474,7 @@ program main
   !$omp task if(target: .true.)
   !$omp end task
 
-  !ERROR: At most one IF clause can appear on the TASK directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp task if(.true.) if(task: .false.)
   !$omp end task
 
@@ -497,7 +497,7 @@ program main
   end do
   !$omp end taskloop
 
-  !ERROR: At most one IF clause can appear on the TASKLOOP directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp taskloop if(.true.) if(taskloop: .false.)
   do i = 1, 10
   end do
@@ -535,7 +535,7 @@ program main
   !$omp teams if(target: .true.)
   !$omp end teams
 
-  !ERROR: At most one IF clause can appear on the TEAMS directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp teams if(.true.) if(teams: .false.)
   !$omp end teams
 
@@ -558,7 +558,7 @@ program main
   end do
   !$omp end teams distribute
 
-  !ERROR: At most one IF clause can appear on the TEAMS DISTRIBUTE directive
+  !ERROR: At most one IF clause can apply to each directive constituent
   !$omp teams distribute if(.true.) if(teams: .true.)
   do i = 1, 10
   end do
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 65118c96bc2e8..679a944fc4358 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -761,9 +761,11 @@ def OMP_EndDeclareVariant : Directive<[Spelling<"end declare variant", 1, 52>,
   let languages = OMP_BeginDeclareVariant.languages;
 }
 def OMP_Cancel : Directive<[Spelling<"cancel">]> {
+  let allowedClauses = [
+    VersionedClause<OMPC_If>,
+  ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_CancellationConstructType>,
-    VersionedClause<OMPC_If>,
   ];
   let association = AS_None;
   let category = CA_Executable;
@@ -1074,6 +1076,7 @@ def OMP_Parallel : Directive<[Spelling<"parallel">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
@@ -1081,7 +1084,6 @@ def OMP_Parallel : Directive<[Spelling<"parallel">]> {
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
@@ -1168,6 +1170,7 @@ def OMP_Simd : Directive<[Spelling<"simd">]> {
   let allowedClauses = [
     VersionedClause<OMPC_Aligned>,
     VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_If, 50>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_NonTemporal, 50>,
@@ -1176,7 +1179,6 @@ def OMP_Simd : Directive<[Spelling<"simd">]> {
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_If, 50>,
     VersionedClause<OMPC_Order, 50>,
     VersionedClause<OMPC_SafeLen>,
     VersionedClause<OMPC_SimdLen>,
@@ -1215,6 +1217,7 @@ def OMP_Target : Directive<[Spelling<"target">]> {
     VersionedClause<OMPC_Depend>,
     VersionedClause<OMPC_DynGroupprivate, 61>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_HasDeviceAddr, 51>,
     VersionedClause<OMPC_InReduction, 50>,
     VersionedClause<OMPC_IsDevicePtr>,
@@ -1227,7 +1230,6 @@ def OMP_Target : Directive<[Spelling<"target">]> {
     VersionedClause<OMPC_Default, 60>,
     VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_OMPX_Bare>,
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
@@ -1239,10 +1241,12 @@ def OMP_Target : Directive<[Spelling<"target">]> {
 }
 def OMP_TargetData : Directive<[Spelling<"target data", 1, 52>,
                                 Spelling<"target_data", 60>]> {
+  let allowedClauses = [
+    VersionedClause<OMPC_If>,
+  ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default, 60>,
     VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Transparent, 60>,
   ];
   let requiredClauses = [
@@ -1257,10 +1261,10 @@ def OMP_TargetEnterData : Directive<[Spelling<"target enter data", 1, 52>,
                                      Spelling<"target_enter_data", 60>]> {
   let allowedClauses = [
     VersionedClause<OMPC_Depend>,
+    VersionedClause<OMPC_If>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Replayable, 60>,
   ];
@@ -1274,10 +1278,10 @@ def OMP_TargetExitData : Directive<[Spelling<"target exit data", 1, 52>,
                                     Spelling<"target_exit_data", 60>]> {
   let allowedClauses = [
     VersionedClause<OMPC_Depend>,
+    VersionedClause<OMPC_If>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Replayable, 60>,
   ];
@@ -1292,11 +1296,11 @@ def OMP_TargetUpdate : Directive<[Spelling<"target update", 1, 52>,
   let allowedClauses = [
     VersionedClause<OMPC_Depend>,
     VersionedClause<OMPC_From>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_To>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Replayable, 60>,
   ];
@@ -1309,6 +1313,7 @@ def OMP_Task : Directive<[Spelling<"task">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Depend>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_InReduction>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Shared>,
@@ -1317,7 +1322,6 @@ def OMP_Task : Directive<[Spelling<"task">]> {
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Detach, 50>,
     VersionedClause<OMPC_Final>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_Priority>,
     VersionedClause<OMPC_Replayable, 60>,
@@ -1329,10 +1333,12 @@ def OMP_Task : Directive<[Spelling<"task">]> {
   let category = CA_Executable;
 }
 def OMP_Taskgraph : Directive<[Spelling<"taskgraph">]> {
+  let allowedClauses = [
+    VersionedClause<OMPC_If>,
+  ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_GraphId, 60>,
     VersionedClause<OMPC_GraphReset, 60>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoGroup>,
   ];
   let association = AS_Block;
@@ -1351,6 +1357,7 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_InReduction>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
@@ -1360,7 +1367,6 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> {
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Final>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_NoGroup>,
     VersionedClause<OMPC_Priority>,
@@ -1396,6 +1402,7 @@ def OMP_Teams : Directive<[Spelling<"teams">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_DynGroupprivate, 61>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If, 52>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
@@ -1403,7 +1410,6 @@ def OMP_Teams : Directive<[Spelling<"teams">]> {
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_If, 52>,
     VersionedClause<OMPC_NumTeams>,
     VersionedClause<OMPC_ThreadLimit>,
   ];
@@ -1494,6 +1500,7 @@ def OMP_DistributeParallelDo : Directive<[Spelling<"distribute parallel do">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_Private>,
@@ -1504,7 +1511,6 @@ def OMP_DistributeParallelDo : Directive<[Spelling<"distribute parallel do">]> {
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Order, 50>,
@@ -1554,6 +1560,7 @@ def OMP_DistributeParallelFor
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
@@ -1564,7 +1571,6 @@ def OMP_DistributeParallelFor
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Order, 50>,
@@ -1615,6 +1621,7 @@ def OMP_DistributeSimd : Directive<[Spelling<"distribute simd">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If, 50>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_NonTemporal, 50>,
@@ -1625,7 +1632,6 @@ def OMP_DistributeSimd : Directive<[Spelling<"distribute simd">]> {
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_If, 50>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Order, 50>,
@@ -1642,6 +1648,7 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> {
   let allowedClauses = [
     VersionedClause<OMPC_Aligned>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If, 50>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_Private>,
@@ -1649,7 +1656,6 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> {
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_If, 50>,
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Order, 50>,
     VersionedClause<OMPC_Ordered>,
@@ -1700,6 +1706,7 @@ def OMP_target_loop : Directive<[Spelling<"target loop">]> {
     VersionedClause<OMPC_Depend>,
     VersionedClause<OMPC_DynGroupprivate, 61>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_IsDevicePtr>,
     VersionedClause<OMPC_HasDeviceAddr, 51>,
     VersionedClause<OMPC_LastPrivate>,
@@ -1716,7 +1723,6 @@ def OMP_target_loop : Directive<[Spelling<"target loop">]> {
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_OMPX_DynCGroupMem>,
     VersionedClause<OMPC_Order>,
@@ -1729,6 +1735,7 @@ def OMP_MaskedTaskloop : Directive<[Spelling<"masked taskloop">]> {
   let allowedClauses = [
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_InReduction>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Private>,
@@ -1741,7 +1748,6 @@ def OMP_MaskedTaskloop : Directive<[Spelling<"masked taskloop">]> {
     VersionedClause<OMPC_Filter>,
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_GrainSize>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_NoGroup>,
     VersionedClause<OMPC_NumTasks>,
@@ -1756,6 +1762,7 @@ def OMP_MaskedTaskloopSimd : Directive<[Spelling<"masked taskloop simd">]> {
     VersionedClause<OMPC_Aligned>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_InReduction>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
@@ -1770,7 +1777,6 @@ def OMP_MaskedTaskloopSimd : Directive<[Spelling<"masked taskloop simd">]> {
     VersionedClause<OMPC_Filter>,
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_GrainSize>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_NoGroup>,
     VersionedClause<OMPC_NumTasks>,
@@ -1787,6 +1793,7 @@ def OMP_MasterTaskloop : Directive<[Spelling<"master taskloop">]> {
   let allowedClauses = [
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_InReduction>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Private>,
@@ -1798,7 +1805,6 @@ def OMP_MasterTaskloop : Directive<[Spelling<"master taskloop">]> {
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_GrainSize>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_NoGroup>,
     VersionedClause<OMPC_NumTasks>,
@@ -1813,6 +1819,7 @@ def OMP_MasterTaskloopSimd : Directive<[Spelling<"master taskloop simd">]> {
     VersionedClause<OMPC_Aligned>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_InReduction>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
@@ -1826,7 +1833,6 @@ def OMP_MasterTaskloopSimd : Directive<[Spelling<"master taskloop simd">]> {
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_GrainSize>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_NoGroup>,
     VersionedClause<OMPC_NumTasks>,
@@ -1844,6 +1850,7 @@ def OMP_ParallelDo : Directive<[Spelling<"parallel do">]> {
     VersionedClause<OMPC_Allocate, 50>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_Private>,
@@ -1853,7 +1860,6 @@ def OMP_ParallelDo : Directive<[Spelling<"parallel do">]> {
   let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Order, 50>,
@@ -1962,6 +1968,7 @@ def OMP_parallel_loop : Directive<[Spelling<"parallel loop">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
@@ -1972,7 +1979,6 @@ def OMP_parallel_loop : Directive<[Spelling<"parallel loop">]> {
     VersionedClause<OMPC_Bind, 50>,
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Order>,
@@ -2010,6 +2016,7 @@ def OMP_ParallelMaskedTaskloop
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
@@ -2022,7 +2029,6 @@ def OMP_ParallelMaskedTaskloop
     VersionedClause<OMPC_Filter>,
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_GrainSize>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NoGroup>,
@@ -2043,6 +2049,7 @@ def OMP_ParallelMaskedTaskloopSimd
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_NonTemporal, 50>,
@@ -2057,7 +2064,6 @@ def OMP_ParallelMaskedTaskloopSimd
     VersionedClause<OMPC_Filter>,
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_GrainSize>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NoGroup>,
@@ -2079,6 +2085,7 @@ def OMP_ParallelMaster : Directive<[Spelling<"parallel master">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
@@ -2086,7 +2093,6 @@ def OMP_ParallelMaster : Directive<[Spelling<"parallel master">]> {
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
@@ -2101,6 +2107,7 @@ def OMP_ParallelMasterTaskloop
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
@@ -2112,7 +2119,6 @@ def OMP_ParallelMasterTaskloop
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_GrainSize>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NoGroup>,
@@ -2133,6 +2139,7 @@ def OMP_ParallelMasterTaskloopSimd
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_NonTemporal, 50>,
@@ -2146,7 +2153,6 @@ def OMP_ParallelMasterTaskloopSimd
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_GrainSize>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NoGroup>,
@@ -2168,6 +2174,7 @@ def OMP_ParallelSections : Directive<[Spelling<"parallel sections">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
@@ -2176,7 +2183,6 @@ def OMP_ParallelSections : Directive<[Spelling<"parallel sections">]> {
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
@@ -2190,13 +2196,13 @@ def OMP_ParallelWorkshare : Directive<[Spelling<"parallel workshare">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Shared>,
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Message, 60>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
@@ -2833,6 +2839,7 @@ def OMP_TeamsDistribute : Directive<[Spelling<"teams distribute">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_DynGroupprivate, 61>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
@@ -2843,7 +2850,6 @@ def OMP_TeamsDistribute : Directive<[Spelling<"teams distribute">]> {
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NumTeams>,
     VersionedClause<OMPC_Order, 50>,
     VersionedClause<OMPC_ThreadLimit>,
@@ -3016,6 +3022,7 @@ def OMP_TeamsWorkdistribute : Directive<[Spelling<"teams workdistribute">]> {
   let allowedClauses = [
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If, 52>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
@@ -3023,7 +3030,6 @@ def OMP_TeamsWorkdistribute : Directive<[Spelling<"teams workdistribute">]> {
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_If, 52>,
     VersionedClause<OMPC_NumTeams>,
     VersionedClause<OMPC_ThreadLimit>,
   ];
@@ -3036,6 +3042,7 @@ def OMP_teams_loop : Directive<[Spelling<"teams loop">]> {
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_DynGroupprivate, 61>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_If, 52>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_OMPX_Attribute>,
     VersionedClause<OMPC_Private>,

>From 34ed491f6375a39248f71a98eaab5810dfc1324e Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 24 Jun 2026 12:42:58 +0200
Subject: [PATCH 15/42] [SCEV] Infer addrec nowrap flags during range analysis
 (#202964)

When we're computing the range of the addrec, we already have to reason
about whether it wraps, so we may as well determine the nowrap flags at
the same time.

This is more precise than the previous logic that took the addrec range
and checked whether adding a step to it does not wrap. For example, an
`{0,+,1}` addrec with a full range can still be non-wrapping.

Note that I removed some assertions in the SCEV printed that predicated
exit counts actually have predicates. Due to SCEV's query order
dependence, this can happen, also prior to this change, see for example
https://llvm.godbolt.org/z/cWK1MMEqv. While this indicates suboptimal
results, it's not a bug, and we should not assert.

Fixes https://github.com/llvm/llvm-project/issues/200788.
---
 llvm/include/llvm/Analysis/ScalarEvolution.h  |  13 +-
 llvm/lib/Analysis/ScalarEvolution.cpp         | 130 +++++----
 .../Delinearization/global_array_bounds.ll    |   2 +-
 .../Analysis/DependenceAnalysis/Banerjee.ll   |  22 +-
 .../Analysis/DependenceAnalysis/BasePtrBug.ll |   2 +-
 .../DependenceAnalysis/Constraints.ll         |  36 +--
 .../DependenceAnalysis/DifferentOffsets.ll    |   2 +-
 .../NonCanonicalizedSubscript.ll              |   2 +-
 .../DependenceAnalysis/Propagating.ll         |  20 +-
 .../clamped-access-pattern.ll                 |   6 +-
 .../inbounds-gep-in-predicated-blocks.ll      |  10 +-
 .../nssw-predicate-implied.ll                 |  52 ++--
 .../LoopAccessAnalysis/symbolic-stride.ll     |  12 +-
 ...drec-computed-during-addrec-calculation.ll |   2 +-
 .../backedge-taken-count-guard-info.ll        |   2 +-
 .../ScalarEvolution/becount-invalidation.ll   |   4 +-
 .../ScalarEvolution/different-loops-recs.ll   |  32 +--
 .../ScalarEvolution/exit-count-non-strict.ll  |  30 +-
 .../ScalarEvolution/exit-count-select-safe.ll | 108 ++++----
 .../ScalarEvolution/incorrect-exit-count.ll   |  14 +-
 .../increasing-or-decreasing-iv.ll            |   6 +-
 .../Analysis/ScalarEvolution/limit-depth.ll   |   2 +-
 ...ge-taken-count-guard-info-operand-order.ll |   2 +-
 ...en-count-guard-info-rewrite-expressions.ll |   4 +-
 .../max-backedge-taken-count-guard-info.ll    |   2 +-
 .../ScalarEvolution/mul-udiv-folds.ll         |   4 +-
 .../test/Analysis/ScalarEvolution/pr123550.ll |   8 +-
 llvm/test/Analysis/ScalarEvolution/pr22641.ll |   2 +-
 .../test/Analysis/ScalarEvolution/ptrtoint.ll |   4 +-
 .../Analysis/ScalarEvolution/sext-iv-2.ll     |   4 +-
 .../test/Analysis/ScalarEvolution/sext-mul.ll |   8 +-
 .../trip-count-negative-stride.ll             |  12 +-
 .../ScalarEvolution/umin-umax-folds.ll        |  12 +-
 llvm/test/CodeGen/PowerPC/mma-intrinsics.ll   | 212 +++++++--------
 .../IndVarSimplify/eliminate-exit-no-dl.ll    |   3 +-
 .../fixed-size-no-signed-wrap.ll              |  32 ++-
 .../RISCV/masked_gather_scatter.ll            |  39 +--
 .../LoopVectorize/X86/cast-costs.ll           |  29 +-
 .../LoopVectorize/iv-select-cmp-no-wrap.ll    |  22 +-
 .../LoopVectorize/iv-select-cmp-trunc.ll      | 256 +++++++++---------
 ...conditional_surrounding_non_affine_loop.ll |   2 +-
 41 files changed, 555 insertions(+), 611 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 37da037ffcae8..1e09dbc3db5f1 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1983,10 +1983,11 @@ class ScalarEvolution {
   /// operands iteratively first.
   const ConstantRange &getRangeRefIter(const SCEV *S, RangeSignHint Hint);
 
-  /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}.
-  /// Helper for \c getRange.
-  ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Step,
-                                    const APInt &MaxBECount);
+  /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step},
+  /// and whether it may wrap. Helper for \c getRange.
+  std::pair<ConstantRange, SCEV::NoWrapFlags>
+  getRangeForAffineAR(const SCEV *Start, const SCEV *Step,
+                      const APInt &MaxBECount);
 
   /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
   /// Start,+,\p Step}<nw>.
@@ -2386,8 +2387,8 @@ class ScalarEvolution {
   bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step,
                                  const Loop *L);
 
-  /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation.
-  SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR);
+  /// Try to infer NSW or NUW on \p AR relying on ConstantRange manipulation.
+  void inferNoWrapViaConstantRanges(const SCEVAddRecExpr *AR);
 
   /// Try to prove NSW on \p AR by proving facts about conditions known  on
   /// entry and backedge.
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 6c419a9895ef5..2129c8667cc6c 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -5244,14 +5244,17 @@ class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
 
 } // end anonymous namespace
 
-SCEV::NoWrapFlags
-ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
+void ScalarEvolution::inferNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
   if (!AR->isAffine())
-    return SCEV::FlagAnyWrap;
+    return;
 
-  using OBO = OverflowingBinaryOperator;
+  // Force computation of ranges, which will also perform range-based flag
+  // inference.
+  if (!AR->hasNoSignedWrap())
+    (void)getSignedRange(AR);
 
-  SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;
+  if (!AR->hasNoUnsignedWrap())
+    (void)getUnsignedRange(AR);
 
   if (!AR->hasNoSelfWrap()) {
     const SCEV *BECount = getConstantMaxBackedgeTakenCount(AR->getLoop());
@@ -5261,31 +5264,9 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
       unsigned NoOverflowBitWidth =
         BECountAP.getActiveBits() + StepCR.getMinSignedBits();
       if (NoOverflowBitWidth <= getTypeSizeInBits(AR->getType()))
-        Result = ScalarEvolution::setFlags(Result, SCEV::FlagNW);
+        const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
     }
   }
-
-  if (!AR->hasNoSignedWrap()) {
-    ConstantRange AddRecRange = getSignedRange(AR);
-    ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));
-
-    auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
-        Instruction::Add, IncRange, OBO::NoSignedWrap);
-    if (NSWRegion.contains(AddRecRange))
-      Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW);
-  }
-
-  if (!AR->hasNoUnsignedWrap()) {
-    ConstantRange AddRecRange = getUnsignedRange(AR);
-    ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this));
-
-    auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
-        Instruction::Add, IncRange, OBO::NoUnsignedWrap);
-    if (NUWRegion.contains(AddRecRange))
-      Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW);
-  }
-
-  return Result;
 }
 
 SCEV::NoWrapFlags
@@ -5947,10 +5928,8 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
   const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
   insertValueToMap(PN, PHISCEV);
 
-  if (auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
-    setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR),
-                   (AR->getNoWrapFlags() | proveNoWrapViaConstantRanges(AR)));
-  }
+  if (auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV))
+    inferNoWrapViaConstantRanges(AR);
 
   // We can add Flags to the post-inc expression only if we
   // know that it is *undefined behavior* for BEValueV to
@@ -6077,11 +6056,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
         forgetMemoizedResults({SymbolicName});
         insertValueToMap(PN, PHISCEV);
 
-        if (auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
-          setNoWrapFlags(
-              const_cast<SCEVAddRecExpr *>(AR),
-              (AR->getNoWrapFlags() | proveNoWrapViaConstantRanges(AR)));
-        }
+        if (auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV))
+          inferNoWrapViaConstantRanges(AR);
 
         // We can add Flags to the post-inc expression only if we
         // know that it is *undefined behavior* for BEValueV to
@@ -7042,10 +7018,11 @@ const ConstantRange &ScalarEvolution::getRangeRef(
           MaxBECount = MaxBECount.zext(BitWidth);
 
         if (MaxBECount.getBitWidth() == BitWidth) {
-          auto RangeFromAffine = getRangeForAffineAR(
+          auto [RangeFromAffine, Flags] = getRangeForAffineAR(
               AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount);
           ConservativeResult =
               ConservativeResult.intersectWith(RangeFromAffine, RangeType);
+          const_cast<SCEVAddRecExpr *>(AddRec)->setNoWrapFlags(Flags);
 
           auto RangeFromFactoring = getRangeViaFactoring(
               AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount);
@@ -7223,24 +7200,24 @@ const ConstantRange &ScalarEvolution::getRangeRef(
 // Given a StartRange, Step and MaxBECount for an expression compute a range of
 // values that the expression can take. Initially, the expression has a value
 // from StartRange and then is changed by Step up to MaxBECount times. Signed
-// argument defines if we treat Step as signed or unsigned.
-static ConstantRange getRangeForAffineARHelper(APInt Step,
-                                               const ConstantRange &StartRange,
-                                               const APInt &MaxBECount,
-                                               bool Signed) {
+// argument defines if we treat Step as signed or unsigned. The second return
+// value indicates that no wrapping occurred.
+static std::pair<ConstantRange, bool>
+getRangeForAffineARHelper(APInt Step, const ConstantRange &StartRange,
+                          const APInt &MaxBECount, bool Signed) {
   unsigned BitWidth = Step.getBitWidth();
   assert(BitWidth == StartRange.getBitWidth() &&
          BitWidth == MaxBECount.getBitWidth() && "mismatched bit widths");
   // If either Step or MaxBECount is 0, then the expression won't change, and we
   // just need to return the initial range.
   if (Step == 0 || MaxBECount == 0)
-    return StartRange;
+    return {StartRange, true};
 
   // If we don't know anything about the initial value (i.e. StartRange is
   // FullRange), then we don't know anything about the final range either.
   // Return FullRange.
   if (StartRange.isFullSet())
-    return ConstantRange::getFull(BitWidth);
+    return {ConstantRange::getFull(BitWidth), false};
 
   // If Step is signed and negative, then we use its absolute value, but we also
   // note that we're moving in the opposite direction.
@@ -7256,7 +7233,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
   // Check if Offset is more than full span of BitWidth. If it is, the
   // expression is guaranteed to overflow.
   if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
-    return ConstantRange::getFull(BitWidth);
+    return {ConstantRange::getFull(BitWidth), false};
 
   // Offset is by how much the expression can change. Checks above guarantee no
   // overflow here.
@@ -7268,14 +7245,28 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
   // if the expression is decreasing and will be increased by Offset otherwise.
   APInt StartLower = StartRange.getLower();
   APInt StartUpper = StartRange.getUpper() - 1;
-  APInt MovedBoundary = Descending ? (StartLower - std::move(Offset))
-                                   : (StartUpper + std::move(Offset));
+  bool Overflow;
+  APInt MovedBoundary;
+  if (Signed) {
+    // This does not use sadd_ov, as we want to check overflow for a signed
+    // start with an unsigned offset.
+    if (Descending) {
+      MovedBoundary = StartLower - std::move(Offset);
+      Overflow = MovedBoundary.sgt(StartLower) || StartRange.isSignWrappedSet();
+    } else {
+      MovedBoundary = StartUpper + std::move(Offset);
+      Overflow = MovedBoundary.slt(StartUpper) || StartRange.isSignWrappedSet();
+    }
+  } else {
+    MovedBoundary = StartUpper.uadd_ov(std::move(Offset), Overflow);
+    Overflow |= StartRange.isWrappedSet();
+  }
 
   // It's possible that the new minimum/maximum value will fall into the initial
   // range (due to wrap around). This means that the expression can take any
   // value in this bitwidth, and we have to return full range.
   if (StartRange.contains(MovedBoundary))
-    return ConstantRange::getFull(BitWidth);
+    return {ConstantRange::getFull(BitWidth), false};
 
   APInt NewLower =
       Descending ? std::move(MovedBoundary) : std::move(StartLower);
@@ -7284,12 +7275,13 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
   NewUpper += 1;
 
   // No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
-  return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper));
+  return {ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper)),
+          !Overflow};
 }
 
-ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
-                                                   const SCEV *Step,
-                                                   const APInt &MaxBECount) {
+std::pair<ConstantRange, SCEV::NoWrapFlags>
+ScalarEvolution::getRangeForAffineAR(const SCEV *Start, const SCEV *Step,
+                                     const APInt &MaxBECount) {
   assert(getTypeSizeInBits(Start->getType()) ==
              getTypeSizeInBits(Step->getType()) &&
          getTypeSizeInBits(Start->getType()) == MaxBECount.getBitWidth() &&
@@ -7301,19 +7293,26 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
 
   // If Step can be both positive and negative, we need to find ranges for the
   // maximum absolute step values in both directions and union them.
-  ConstantRange SR = getRangeForAffineARHelper(
-      StepSRange.getSignedMin(), StartSRange, MaxBECount, /* Signed = */ true);
-  SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(),
-                                              StartSRange, MaxBECount,
-                                              /* Signed = */ true));
+  auto [SR1, NSW1] = getRangeForAffineARHelper(
+      StepSRange.getSignedMin(), StartSRange, MaxBECount, /*Signed=*/true);
+  auto [SR2, NSW2] = getRangeForAffineARHelper(StepSRange.getSignedMax(),
+                                               StartSRange, MaxBECount,
+                                               /*Signed=*/true);
+  ConstantRange SR = SR1.unionWith(SR2);
 
   // Next, consider step unsigned.
-  ConstantRange UR = getRangeForAffineARHelper(
+  auto [UR, NUW] = getRangeForAffineARHelper(
       getUnsignedRangeMax(Step), getUnsignedRange(Start), MaxBECount,
-      /* Signed = */ false);
+      /*Signed=*/false);
+
+  SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+  if (NUW)
+    Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+  if (NSW1 && NSW2)
+    Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
 
   // Finally, intersect signed and unsigned ranges.
-  return SR.intersectWith(UR, ConstantRange::Smallest);
+  return {SR.intersectWith(UR, ConstantRange::Smallest), Flags};
 }
 
 ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
@@ -7491,9 +7490,9 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
   const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue);
 
   ConstantRange TrueRange =
-      this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount);
+      this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount).first;
   ConstantRange FalseRange =
-      this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount);
+      this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount).first;
 
   return TrueRange.unionWith(FalseRange);
 }
@@ -14196,7 +14195,6 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   SmallVector<const SCEVPredicate *, 4> Preds;
   auto *PBT = SE->getPredicatedBackedgeTakenCount(L, Preds);
   if (PBT != BTC) {
-    assert(!Preds.empty() && "Different predicated BTC, but no predicates");
     OS << "Loop ";
     L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
     OS << ": ";
@@ -14215,8 +14213,6 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   auto *PredConstantMax =
       SE->getPredicatedConstantMaxBackedgeTakenCount(L, Preds);
   if (PredConstantMax != ConstantBTC) {
-    assert(!Preds.empty() &&
-           "different predicated constant max BTC but no predicates");
     OS << "Loop ";
     L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
     OS << ": ";
@@ -14235,8 +14231,6 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   auto *PredSymbolicMax =
       SE->getPredicatedSymbolicMaxBackedgeTakenCount(L, Preds);
   if (SymbolicBTC != PredSymbolicMax) {
-    assert(!Preds.empty() &&
-           "Different predicated symbolic max BTC, but no predicates");
     OS << "Loop ";
     L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
     OS << ": ";
diff --git a/llvm/test/Analysis/Delinearization/global_array_bounds.ll b/llvm/test/Analysis/Delinearization/global_array_bounds.ll
index 2e13fd66eb930..218038f9a9ac4 100644
--- a/llvm/test/Analysis/Delinearization/global_array_bounds.ll
+++ b/llvm/test/Analysis/Delinearization/global_array_bounds.ll
@@ -19,7 +19,7 @@ define void @test_2d_array(i64 %i, i64 %j, i64 %N, i64 %M) {
 ; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,80}<%for.i>,+,4}<%for.j>
 ; CHECK-NEXT:  Base offset: @test_array_10x20
 ; CHECK-NEXT:  ArrayDecl[UnknownSize][20] with elements of 4 bytes.
-; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
 ; CHECK-NEXT:  Delinearization validation: Failed
 ;
 entry:
diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
index 6572a8bc0cadb..bb89ad55554ff 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -52,11 +52,11 @@ define void @banerjee0(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
 ; DELIN-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
 ; DELIN-NEXT:    da analyze - confused!
 ; DELIN-NEXT:  Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8
-; DELIN-NEXT:    da analyze - input [0 *]!
+; DELIN-NEXT:    da analyze - none!
 ; DELIN-NEXT:  Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
 ; DELIN-NEXT:    da analyze - confused!
 ; DELIN-NEXT:  Src: store i64 %0, ptr %B.addr.11, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
-; DELIN-NEXT:    da analyze - output [0 *]!
+; DELIN-NEXT:    da analyze - none!
 ;
 entry:
   br label %for.cond1.preheader
@@ -802,11 +802,11 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [<= 0|<]!
 ; CHECK-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8
-; CHECK-NEXT:    da analyze - input [* *]!
+; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i64 %1, ptr %B.addr.11, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
@@ -816,11 +816,11 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
 ; NORMALIZE-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
 ; NORMALIZE-NEXT:    da analyze - output [* *]!
 ; NORMALIZE-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8
-; NORMALIZE-NEXT:    da analyze - flow [* *|<]!
+; NORMALIZE-NEXT:    da analyze - flow [<= 0|<]!
 ; NORMALIZE-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; NORMALIZE-NEXT:    da analyze - confused!
 ; NORMALIZE-NEXT:  Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8
-; NORMALIZE-NEXT:    da analyze - input [* *]!
+; NORMALIZE-NEXT:    da analyze - none!
 ; NORMALIZE-NEXT:  Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; NORMALIZE-NEXT:    da analyze - confused!
 ; NORMALIZE-NEXT:  Src: store i64 %1, ptr %B.addr.11, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
@@ -830,7 +830,7 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
 ; DELIN-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
 ; DELIN-NEXT:    da analyze - output [* *]!
 ; DELIN-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8
-; DELIN-NEXT:    da analyze - flow [* *|<]!
+; DELIN-NEXT:    da analyze - flow [<= 0|<]!
 ; DELIN-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; DELIN-NEXT:    da analyze - confused!
 ; DELIN-NEXT:  Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8
@@ -888,11 +888,11 @@ define void @banerjee10(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [<> 0]!
 ; CHECK-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8
-; CHECK-NEXT:    da analyze - input [* *]!
+; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i64 %1, ptr %B.addr.11, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
@@ -902,11 +902,11 @@ define void @banerjee10(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
 ; NORMALIZE-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8
 ; NORMALIZE-NEXT:    da analyze - none!
 ; NORMALIZE-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8
-; NORMALIZE-NEXT:    da analyze - flow [* *|<]!
+; NORMALIZE-NEXT:    da analyze - flow [<> 0]!
 ; NORMALIZE-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; NORMALIZE-NEXT:    da analyze - confused!
 ; NORMALIZE-NEXT:  Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8
-; NORMALIZE-NEXT:    da analyze - input [* *]!
+; NORMALIZE-NEXT:    da analyze - none!
 ; NORMALIZE-NEXT:  Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
 ; NORMALIZE-NEXT:    da analyze - confused!
 ; NORMALIZE-NEXT:  Src: store i64 %1, ptr %B.addr.11, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8
diff --git a/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll b/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll
index b4eba4d35b2f4..dd35508b93ddd 100644
--- a/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll
@@ -22,7 +22,7 @@ define void @test1(ptr nocapture %A, ptr nocapture %B, i32 %N) #0 {
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %gep.0, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %gep.1, align 4 --> Dst: %1 = load i32, ptr %gep.1, align 4
-; CHECK-NEXT:    da analyze - input [*]!
+; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %gep.1, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %add, ptr %gep.B, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/Constraints.ll b/llvm/test/Analysis/DependenceAnalysis/Constraints.ll
index e4063186ac73e..f8f8c5d7f5501 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Constraints.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Constraints.ll
@@ -37,15 +37,15 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Src: %out_l.promoted = load i32, ptr @out_l, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - anti [|<]!
 ; CHECK-NEXT:  Src: store i32 0, ptr %13, align 4 --> Dst: store i32 0, ptr %13, align 4
-; CHECK-NEXT:    da analyze - output [* * *]!
+; CHECK-NEXT:    da analyze - output [S * *]!
 ; CHECK-NEXT:  Src: store i32 0, ptr %13, align 4 --> Dst: %18 = load i32, ptr %17, align 4
-; CHECK-NEXT:    da analyze - flow [* * *|<]!
+; CHECK-NEXT:    da analyze - flow [S * *|<]!
 ; CHECK-NEXT:  Src: store i32 0, ptr %13, align 4 --> Dst: %20 = load i32, ptr %19, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 0, ptr %13, align 4 --> Dst: %23 = load i32, ptr %22, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 0, ptr %13, align 4 --> Dst: store i32 %24, ptr %25, align 4
-; CHECK-NEXT:    da analyze - output [* * *|<]!
+; CHECK-NEXT:    da analyze - output [S * *|<]!
 ; CHECK-NEXT:  Src: store i32 0, ptr %13, align 4 --> Dst: %27 = load i32, ptr %26, align 4
 ; CHECK-NEXT:    da analyze - flow [* * *|<]!
 ; CHECK-NEXT:  Src: store i32 0, ptr %13, align 4 --> Dst: %29 = load i32, ptr %28, align 4
@@ -57,15 +57,15 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Src: store i32 0, ptr %13, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %18 = load i32, ptr %17, align 4 --> Dst: %18 = load i32, ptr %17, align 4
-; CHECK-NEXT:    da analyze - input [* * * *]!
+; CHECK-NEXT:    da analyze - input [S * * *]!
 ; CHECK-NEXT:  Src: %18 = load i32, ptr %17, align 4 --> Dst: %20 = load i32, ptr %19, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %18 = load i32, ptr %17, align 4 --> Dst: %23 = load i32, ptr %22, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %18 = load i32, ptr %17, align 4 --> Dst: store i32 %24, ptr %25, align 4
-; CHECK-NEXT:    da analyze - anti [* * * *|<]!
+; CHECK-NEXT:    da analyze - anti [S * * *|<]!
 ; CHECK-NEXT:  Src: %18 = load i32, ptr %17, align 4 --> Dst: %27 = load i32, ptr %26, align 4
-; CHECK-NEXT:    da analyze - input [* * *|<]!
+; CHECK-NEXT:    da analyze - input [S * *|<]!
 ; CHECK-NEXT:  Src: %18 = load i32, ptr %17, align 4 --> Dst: %29 = load i32, ptr %28, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %18 = load i32, ptr %17, align 4 --> Dst: store i32 %30, ptr %31, align 4
@@ -75,9 +75,9 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Src: %18 = load i32, ptr %17, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %20 = load i32, ptr %19, align 4 --> Dst: %20 = load i32, ptr %19, align 4
-; CHECK-NEXT:    da analyze - input [* * * *]!
+; CHECK-NEXT:    da analyze - input [S * S *]!
 ; CHECK-NEXT:  Src: %20 = load i32, ptr %19, align 4 --> Dst: %23 = load i32, ptr %22, align 4
-; CHECK-NEXT:    da analyze - input [* * * *|<]!
+; CHECK-NEXT:    da analyze - input [S * * *|<]!
 ; CHECK-NEXT:  Src: %20 = load i32, ptr %19, align 4 --> Dst: store i32 %24, ptr %25, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %20 = load i32, ptr %19, align 4 --> Dst: %27 = load i32, ptr %26, align 4
@@ -87,11 +87,11 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Src: %20 = load i32, ptr %19, align 4 --> Dst: store i32 %30, ptr %31, align 4
 ; CHECK-NEXT:    da analyze - anti [* * *|<]!
 ; CHECK-NEXT:  Src: %20 = load i32, ptr %19, align 4 --> Dst: %32 = load i32, ptr %6, align 4
-; CHECK-NEXT:    da analyze - input [*|<]!
+; CHECK-NEXT:    da analyze - input [S|<]!
 ; CHECK-NEXT:  Src: %20 = load i32, ptr %19, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %23 = load i32, ptr %22, align 4 --> Dst: %23 = load i32, ptr %22, align 4
-; CHECK-NEXT:    da analyze - input [* * * *]!
+; CHECK-NEXT:    da analyze - input [S * * *]!
 ; CHECK-NEXT:  Src: %23 = load i32, ptr %22, align 4 --> Dst: store i32 %24, ptr %25, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %23 = load i32, ptr %22, align 4 --> Dst: %27 = load i32, ptr %26, align 4
@@ -99,15 +99,15 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Src: %23 = load i32, ptr %22, align 4 --> Dst: %29 = load i32, ptr %28, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %23 = load i32, ptr %22, align 4 --> Dst: store i32 %30, ptr %31, align 4
-; CHECK-NEXT:    da analyze - anti [* * *|<]!
+; CHECK-NEXT:    da analyze - anti [S * *|<]!
 ; CHECK-NEXT:  Src: %23 = load i32, ptr %22, align 4 --> Dst: %32 = load i32, ptr %6, align 4
-; CHECK-NEXT:    da analyze - input [*|<]!
+; CHECK-NEXT:    da analyze - input [S|<]!
 ; CHECK-NEXT:  Src: %23 = load i32, ptr %22, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %24, ptr %25, align 4 --> Dst: store i32 %24, ptr %25, align 4
-; CHECK-NEXT:    da analyze - output [* * * *]!
+; CHECK-NEXT:    da analyze - output [S * * *]!
 ; CHECK-NEXT:  Src: store i32 %24, ptr %25, align 4 --> Dst: %27 = load i32, ptr %26, align 4
-; CHECK-NEXT:    da analyze - flow [* * *|<]!
+; CHECK-NEXT:    da analyze - flow [S * *|<]!
 ; CHECK-NEXT:  Src: store i32 %24, ptr %25, align 4 --> Dst: %29 = load i32, ptr %28, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %24, ptr %25, align 4 --> Dst: store i32 %30, ptr %31, align 4
@@ -117,7 +117,7 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Src: store i32 %24, ptr %25, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %27 = load i32, ptr %26, align 4 --> Dst: %27 = load i32, ptr %26, align 4
-; CHECK-NEXT:    da analyze - input [* * *]!
+; CHECK-NEXT:    da analyze - input [S * *]!
 ; CHECK-NEXT:  Src: %27 = load i32, ptr %26, align 4 --> Dst: %29 = load i32, ptr %28, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %27 = load i32, ptr %26, align 4 --> Dst: store i32 %30, ptr %31, align 4
@@ -127,7 +127,7 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Src: %27 = load i32, ptr %26, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %29 = load i32, ptr %28, align 4 --> Dst: %29 = load i32, ptr %28, align 4
-; CHECK-NEXT:    da analyze - input [* * *]!
+; CHECK-NEXT:    da analyze - input [S * *]!
 ; CHECK-NEXT:  Src: %29 = load i32, ptr %28, align 4 --> Dst: store i32 %30, ptr %31, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %29 = load i32, ptr %28, align 4 --> Dst: %32 = load i32, ptr %6, align 4
@@ -135,9 +135,9 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Src: %29 = load i32, ptr %28, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %30, ptr %31, align 4 --> Dst: store i32 %30, ptr %31, align 4
-; CHECK-NEXT:    da analyze - output [* * *]!
+; CHECK-NEXT:    da analyze - output [S * *]!
 ; CHECK-NEXT:  Src: store i32 %30, ptr %31, align 4 --> Dst: %32 = load i32, ptr %6, align 4
-; CHECK-NEXT:    da analyze - flow [*|<]!
+; CHECK-NEXT:    da analyze - flow [S|<]!
 ; CHECK-NEXT:  Src: store i32 %30, ptr %31, align 4 --> Dst: store i32 %33, ptr @out_l, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %32 = load i32, ptr %6, align 4 --> Dst: %32 = load i32, ptr %6, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
index 3df7e35b4f16c..077d5bde44eac 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
@@ -148,7 +148,7 @@ end:
 define void @multidim_accesses(ptr %A) {
 ; CHECK-LABEL: 'multidim_accesses'
 ; CHECK-NEXT:  Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4
-; CHECK-NEXT:    da analyze - output [0 0 *]!
+; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4
 ; CHECK-NEXT:    da analyze - output [* * *|<]!
 ; CHECK-NEXT:  Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
index 491a309193258..a3c69270cf4f6 100644
--- a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
@@ -61,7 +61,7 @@ define void @coupled_miv_type_mismatch(i32 %n) {
 ; CHECK-NEXT:  Src: %2 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %add6, ptr %arrayidx10, align 4
 ; CHECK-NEXT:    da analyze - anti [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %add6, ptr %arrayidx10, align 4 --> Dst: store i32 %add6, ptr %arrayidx10, align 4
-; CHECK-NEXT:    da analyze - output [0 *]!
+; CHECK-NEXT:    da analyze - none!
 ;
 entry:
   br label %for.cond
diff --git a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
index 1d8475f43ec6c..75d13aa82faac 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
@@ -16,7 +16,7 @@ define void @prop0(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %conv, ptr %arrayidx5, align 4
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [* <>]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx8, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4
@@ -200,7 +200,7 @@ define void @prop3(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %conv, ptr %arrayidx4, align 4
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [<> *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx8, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4
@@ -258,7 +258,7 @@ define void @prop4(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: store i32 %conv, ptr %arrayidx6, align 4
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx10, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [* <>]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx10, align 4 --> Dst: %0 = load i32, ptr %arrayidx10, align 4
@@ -377,7 +377,7 @@ define void @prop6(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: store i32 %conv, ptr %arrayidx6, align 4
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx11, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [* <>]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx11, align 4 --> Dst: %0 = load i32, ptr %arrayidx11, align 4
@@ -435,13 +435,13 @@ for.end14:                                        ; preds = %for.inc12
 define void @prop7(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'prop7'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %conv, ptr %arrayidx7, align 4
-; CHECK-NEXT:    da analyze - output [0 *]!
+; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx13, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [* -38]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx13, align 4 --> Dst: %0 = load i32, ptr %arrayidx13, align 4
-; CHECK-NEXT:    da analyze - input [0 *]!
+; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx13, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.11, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
@@ -499,11 +499,11 @@ define void @prop8(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %conv, ptr %arrayidx4, align 4
 ; CHECK-NEXT:    da analyze - output [S 0]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [* <>]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:    da analyze - input [0 *]!
+; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.11, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
@@ -557,7 +557,7 @@ define void @prop9(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %conv, ptr %arrayidx7, align 4
 ; CHECK-NEXT:    da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:    da analyze - flow [* *|<]!
+; CHECK-NEXT:    da analyze - flow [* <>]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/clamped-access-pattern.ll b/llvm/test/Analysis/LoopAccessAnalysis/clamped-access-pattern.ll
index 55ee04388515b..3ce2fc37c3b48 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/clamped-access-pattern.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/clamped-access-pattern.ll
@@ -308,7 +308,7 @@ define void @clamped_small_bound(ptr %a) {
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %gep = getelementptr inbounds i32, ptr %a, i64 %idx:
 ; CHECK-NEXT:        ((4 * (zext i1 {false,+,true}<%loop> to i64))<nuw><nsw> + %a)<nuw>
-; CHECK-NEXT:        --> {%a,+,-4}<%loop>
+; CHECK-NEXT:        --> {%a,+,-4}<nw><%loop>
 ;
 entry:
   br label %loop
@@ -1040,7 +1040,7 @@ define void @clamped_mul_huge_scale_as1(ptr addrspace(1) %a) {
 ; CHECK-LABEL: 'clamped_mul_huge_scale_as1'
 ; CHECK-NEXT:    loop:
 ; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:    Unsafe indirect dependence.
+; CHECK-NEXT:  Unsafe indirect dependence.
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:        IndirectUnsafe:
 ; CHECK-NEXT:            %ld = load i64, ptr addrspace(1) %gep, align 8 ->
@@ -1054,7 +1054,7 @@ define void @clamped_mul_huge_scale_as1(ptr addrspace(1) %a) {
 ; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
-; CHECK-NEXT:      [PSE]  %gep = getelementptr inbounds i8, ptr addrspace(1) %a, i128 %off:
+; CHECK-NEXT:      [PSE] %gep = getelementptr inbounds i8, ptr addrspace(1) %a, i128 %off:
 ; CHECK-NEXT:        ((36893488147419103232 * (zext i2 {0,+,1}<%loop> to i128))<nuw><nsw> + %a)<nuw>
 ; CHECK-NEXT:        --> {%a,+,36893488147419103232}<nw><%loop>
 ;
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll b/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll
index fb4e91f38afbb..aeaf59c7aea44 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll
@@ -168,8 +168,8 @@ exit:
 define i32 @test_nusw_gep_with_load_user_outside_loop(ptr %A) {
 ; CHECK-LABEL: 'test_nusw_gep_with_load_user_outside_loop'
 ; CHECK-NEXT:    loop.header:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop.
-; CHECK-NEXT:      Unknown data dependence.
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
 ; CHECK-NEXT:      Dependences:
 ; CHECK-NEXT:        Unknown:
 ; CHECK-NEXT:            store i32 0, ptr %A, align 4 ->
@@ -179,7 +179,7 @@ define i32 @test_nusw_gep_with_load_user_outside_loop(ptr %A) {
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group GRP0:
 ; CHECK-NEXT:          (Low: (-392 + %A) High: (8 + %A))
-; CHECK-NEXT:            Member: {(4 + %A),+,-4}<%loop.header>
+; CHECK-NEXT:            Member: {(4 + %A),+,-4}<nw><%loop.header>
 ; CHECK-NEXT:            Member: %A
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
@@ -187,9 +187,9 @@ define i32 @test_nusw_gep_with_load_user_outside_loop(ptr %A) {
 ; CHECK-NEXT:      {true,+,true}<%loop.header> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
-; CHECK-NEXT:      [PSE]  %gep = getelementptr nusw i32, ptr %A, i64 %and:
+; CHECK-NEXT:      [PSE] %gep = getelementptr nusw i32, ptr %A, i64 %and:
 ; CHECK-NEXT:        ((4 * (zext i1 {true,+,true}<%loop.header> to i64))<nuw><nsw> + %A)
-; CHECK-NEXT:        --> {(4 + %A),+,-4}<%loop.header>
+; CHECK-NEXT:        --> {(4 + %A),+,-4}<nw><%loop.header>
 ;
 entry:
   br label %loop.header
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/nssw-predicate-implied.ll b/llvm/test/Analysis/LoopAccessAnalysis/nssw-predicate-implied.ll
index 168e9a867d217..2cd487f71f068 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/nssw-predicate-implied.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/nssw-predicate-implied.ll
@@ -18,10 +18,10 @@ define void @wrap_check_iv.3_implies_iv.2(i32 noundef %N, ptr %dst, ptr %src) {
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group GRP0:
 ; CHECK-NEXT:          (Low: %dst High: (4 + (12 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %dst))
-; CHECK-NEXT:            Member: {%dst,+,12}<%loop>
+; CHECK-NEXT:            Member: {%dst,+,12}<nw><%loop>
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %src))
-; CHECK-NEXT:            Member: {%src,+,8}<%loop>
+; CHECK-NEXT:            Member: {%src,+,8}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -30,10 +30,10 @@ define void @wrap_check_iv.3_implies_iv.2(i32 noundef %N, ptr %dst, ptr %src) {
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,2}<%loop> to i64))<nsw> + %src)
-; CHECK-NEXT:        --> {%src,+,8}<%loop>
+; CHECK-NEXT:        --> {%src,+,8}<nw><%loop>
 ; CHECK-NEXT:      [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,3}<%loop> to i64))<nsw> + %dst)
-; CHECK-NEXT:        --> {%dst,+,12}<%loop>
+; CHECK-NEXT:        --> {%dst,+,12}<nw><%loop>
 ;
 entry:
   br label %loop
@@ -73,10 +73,10 @@ define void @wrap_check_iv.3_implies_iv.2_different_start(i32 noundef %N, ptr %d
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group GRP0:
 ; CHECK-NEXT:          (Low: (12 + %dst) High: (16 + (8 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %dst))
-; CHECK-NEXT:            Member: {(12 + %dst),+,8}<%loop>
+; CHECK-NEXT:            Member: {(12 + %dst),+,8}<nw><%loop>
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %src))
-; CHECK-NEXT:            Member: {%src,+,8}<%loop>
+; CHECK-NEXT:            Member: {%src,+,8}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -85,10 +85,10 @@ define void @wrap_check_iv.3_implies_iv.2_different_start(i32 noundef %N, ptr %d
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,2}<%loop> to i64))<nsw> + %src)
-; CHECK-NEXT:        --> {%src,+,8}<%loop>
+; CHECK-NEXT:        --> {%src,+,8}<nw><%loop>
 ; CHECK-NEXT:      [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3:
 ; CHECK-NEXT:        (4 + (4 * (sext i32 {2,+,2}<%loop> to i64))<nsw> + %dst)
-; CHECK-NEXT:        --> {(12 + %dst),+,8}<%loop>
+; CHECK-NEXT:        --> {(12 + %dst),+,8}<nw><%loop>
 ;
 entry:
   br label %loop
@@ -128,10 +128,10 @@ define void @wrap_check_iv.3_implies_iv.2_predicates_added_in_different_order(i3
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group GRP0:
 ; CHECK-NEXT:          (Low: %dst High: (4 + (8 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %dst))
-; CHECK-NEXT:            Member: {%dst,+,8}<%loop>
+; CHECK-NEXT:            Member: {%dst,+,8}<nw><%loop>
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: %src High: (4 + (12 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %src))
-; CHECK-NEXT:            Member: {%src,+,12}<%loop>
+; CHECK-NEXT:            Member: {%src,+,12}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -140,10 +140,10 @@ define void @wrap_check_iv.3_implies_iv.2_predicates_added_in_different_order(i3
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.3:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,3}<%loop> to i64))<nsw> + %src)
-; CHECK-NEXT:        --> {%src,+,12}<%loop>
+; CHECK-NEXT:        --> {%src,+,12}<nw><%loop>
 ; CHECK-NEXT:      [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.2:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,2}<%loop> to i64))<nsw> + %dst)
-; CHECK-NEXT:        --> {%dst,+,8}<%loop>
+; CHECK-NEXT:        --> {%dst,+,8}<nw><%loop>
 ;
 entry:
   br label %loop
@@ -182,10 +182,10 @@ define void @wrap_check_iv.3_does_not_implies_iv.2_due_to_start(i32 noundef %N,
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group GRP0:
 ; CHECK-NEXT:          (Low: %dst High: (4 + (12 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %dst))
-; CHECK-NEXT:            Member: {%dst,+,12}<%loop>
+; CHECK-NEXT:            Member: {%dst,+,12}<nw><%loop>
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: (40 + %src) High: (44 + (8 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %src))
-; CHECK-NEXT:            Member: {(40 + %src),+,8}<%loop>
+; CHECK-NEXT:            Member: {(40 + %src),+,8}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -195,10 +195,10 @@ define void @wrap_check_iv.3_does_not_implies_iv.2_due_to_start(i32 noundef %N,
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2:
 ; CHECK-NEXT:        ((4 * (sext i32 {10,+,2}<%loop> to i64))<nsw> + %src)
-; CHECK-NEXT:        --> {(40 + %src),+,8}<%loop>
+; CHECK-NEXT:        --> {(40 + %src),+,8}<nw><%loop>
 ; CHECK-NEXT:      [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,3}<%loop> to i64))<nsw> + %dst)
-; CHECK-NEXT:        --> {%dst,+,12}<%loop>
+; CHECK-NEXT:        --> {%dst,+,12}<nw><%loop>
 ;
 entry:
   br label %loop
@@ -237,10 +237,10 @@ define void @wrap_check_iv.3_does_not_imply_iv.2_due_to_start_negative(i32 nound
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group GRP0:
 ; CHECK-NEXT:          (Low: (-4 + %dst) High: ((12 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %dst))
-; CHECK-NEXT:            Member: {(-4 + %dst),+,12}<%loop>
+; CHECK-NEXT:            Member: {(-4 + %dst),+,12}<nw><%loop>
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %src))
-; CHECK-NEXT:            Member: {%src,+,8}<%loop>
+; CHECK-NEXT:            Member: {%src,+,8}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -250,10 +250,10 @@ define void @wrap_check_iv.3_does_not_imply_iv.2_due_to_start_negative(i32 nound
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,2}<%loop> to i64))<nsw> + %src)
-; CHECK-NEXT:        --> {%src,+,8}<%loop>
+; CHECK-NEXT:        --> {%src,+,8}<nw><%loop>
 ; CHECK-NEXT:      [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3:
 ; CHECK-NEXT:        ((4 * (sext i32 {-1,+,3}<%loop> to i64))<nsw> + %dst)
-; CHECK-NEXT:        --> {(-4 + %dst),+,12}<%loop>
+; CHECK-NEXT:        --> {(-4 + %dst),+,12}<nw><%loop>
 ;
 entry:
   br label %loop
@@ -292,10 +292,10 @@ define void @wrap_check_iv.3_does_not_imply_iv.2_due_to_negative_step(i32 nounde
 ; CHECK-NEXT:      Grouped accesses:
 ; CHECK-NEXT:        Group GRP0:
 ; CHECK-NEXT:          (Low: ((-4 * (zext i32 (-1 + %N) to i64))<nsw> + %dst) High: (4 + %dst))
-; CHECK-NEXT:            Member: {%dst,+,-4}<%loop>
+; CHECK-NEXT:            Member: {%dst,+,-4}<nw><%loop>
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %src))
-; CHECK-NEXT:            Member: {%src,+,8}<%loop>
+; CHECK-NEXT:            Member: {%src,+,8}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -305,10 +305,10 @@ define void @wrap_check_iv.3_does_not_imply_iv.2_due_to_negative_step(i32 nounde
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,2}<%loop> to i64))<nsw> + %src)
-; CHECK-NEXT:        --> {%src,+,8}<%loop>
+; CHECK-NEXT:        --> {%src,+,8}<nw><%loop>
 ; CHECK-NEXT:      [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3:
 ; CHECK-NEXT:        ((4 * (sext i32 {0,+,-1}<%loop> to i64))<nsw> + %dst)
-; CHECK-NEXT:        --> {%dst,+,-4}<%loop>
+; CHECK-NEXT:        --> {%dst,+,-4}<nw><%loop>
 ;
 entry:
   br label %loop
@@ -407,7 +407,7 @@ define void @narrower_i8_nssw_implies_wider_i32_nssw(ptr %dst, ptr %src, i32 %N)
 ; CHECK-NEXT:            Member: {%dst,+,4}<nw><%loop>
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64))<nuw><nsw> + %src))
-; CHECK-NEXT:            Member: {%src,+,8}<%loop>
+; CHECK-NEXT:            Member: {%src,+,8}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -419,7 +419,7 @@ define void @narrower_i8_nssw_implies_wider_i32_nssw(ptr %dst, ptr %src, i32 %N)
 ; CHECK-NEXT:        --> {%dst,+,4}<nw><%loop>
 ; CHECK-NEXT:      [PSE] %gep.src = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2:
 ; CHECK-NEXT:        ((4 * (sext i8 {0,+,2}<%loop> to i64))<nsw> + %src)
-; CHECK-NEXT:        --> {%src,+,8}<%loop>
+; CHECK-NEXT:        --> {%src,+,8}<nw><%loop>
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 1c48b0ed0f967..88cf23e412183 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -329,8 +329,8 @@ define double @single_iteration_unknown_stride(i32 %x, ptr %y, i1 %cond) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul:
-; CHECK-NEXT:        {(8 + %y),+,(8 * (sext i32 %x to i64))<nsw>}<%loop.body>
-; CHECK-NEXT:        --> {(8 + %y),+,8}<%loop.body>
+; CHECK-NEXT:        {(8 + %y),+,(8 * (sext i32 %x to i64))<nsw>}<nw><%loop.body>
+; CHECK-NEXT:        --> {(8 + %y),+,8}<nw><%loop.body>
 ;
 entry:
   br i1 %cond, label %noloop.exit, label %loop.ph
@@ -522,7 +522,7 @@ define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32
 ; CHECK-NEXT:            Member: %A
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B))))
-; CHECK-NEXT:            Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
+; CHECK-NEXT:            Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -531,7 +531,7 @@ define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
 ; CHECK-NEXT:        ((2 * (sext i32 {%j,+,(zext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
-; CHECK-NEXT:        --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
+; CHECK-NEXT:        --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<nw><%loop>
 ;
 entry:
   %N.ext = zext i16 %N to i32
@@ -573,7 +573,7 @@ define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) {
 ; CHECK-NEXT:            Member: %A
 ; CHECK-NEXT:        Group GRP1:
 ; CHECK-NEXT:          (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B))))
-; CHECK-NEXT:            Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
+; CHECK-NEXT:            Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<nw><%loop>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:      SCEV assumptions:
@@ -582,7 +582,7 @@ define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) {
 ; CHECK-NEXT:      Expressions re-written:
 ; CHECK-NEXT:      [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
 ; CHECK-NEXT:        ((2 * (sext i32 {%j,+,(sext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
-; CHECK-NEXT:        --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
+; CHECK-NEXT:        --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<nw><%loop>
 ;
 entry:
   %N.ext = sext i16 %N to i32
diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-computed-during-addrec-calculation.ll b/llvm/test/Analysis/ScalarEvolution/addrec-computed-during-addrec-calculation.ll
index fad5c3a144e17..81d7fbe2d0c9e 100644
--- a/llvm/test/Analysis/ScalarEvolution/addrec-computed-during-addrec-calculation.ll
+++ b/llvm/test/Analysis/ScalarEvolution/addrec-computed-during-addrec-calculation.ll
@@ -20,7 +20,7 @@ define void @test(ptr %p) {
 ; CHECK-NEXT:    %iv2.ext = sext i32 %iv2 to i64
 ; CHECK-NEXT:    --> {(sext i32 %iv to i64),+,1}<nsw><%loop2> U: [-2147483648,6442450943) S: [-2147483648,6442450943) Exits: <<Unknown>> LoopDispositions: { %loop.header: Variant, %loop2: Computable, %loop3: Invariant }
 ; CHECK-NEXT:    %iv3 = phi i64 [ %iv2.ext, %loop2.end ], [ %iv3.next, %loop3 ]
-; CHECK-NEXT:    --> {{\{\{}}(sext i32 %iv to i64),+,1}<nsw><%loop2>,+,1}<nsw><%loop3> U: [-2147483648,6442450943) S: [-2147483648,6442450943) Exits: {(sext i32 %iv to i64),+,1}<nsw><%loop2> LoopDispositions: { %loop3: Computable, %loop.header: Variant }
+; CHECK-NEXT:    --> {{\{\{}}(sext i32 %iv to i64),+,1}<nsw><%loop2>,+,1}<nuw><nsw><%loop3> U: [-2147483648,6442450943) S: [-2147483648,6442450943) Exits: {(sext i32 %iv to i64),+,1}<nsw><%loop2> LoopDispositions: { %loop3: Computable, %loop.header: Variant }
 ; CHECK-NEXT:    %iv3.next = add nsw i64 %iv3, 1
 ; CHECK-NEXT:    --> {{\{\{}}(1 + (sext i32 %iv to i64))<nsw>,+,1}<nsw><%loop2>,+,1}<nsw><%loop3> U: [-2147483647,6442450944) S: [-2147483647,6442450944) Exits: {(1 + (sext i32 %iv to i64))<nsw>,+,1}<nsw><%loop2> LoopDispositions: { %loop3: Computable, %loop.header: Variant }
 ; CHECK-NEXT:    %iv.next = trunc i64 %iv3 to i32
diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll
index 1964fca603e23..a5b7b166aaa97 100644
--- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll
@@ -77,7 +77,7 @@ define void @rewrite_preserve_add_nsw(i32 %a) {
 ; CHECK-NEXT:    %add = add nsw i32 %a, 4
 ; CHECK-NEXT:    --> (4 + %a)<nsw> U: [-2147483644,-2147483648) S: [-2147483644,-2147483648)
 ; CHECK-NEXT:    %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: [0,-2147483648) S: [0,-2147483648) Exits: (4 + %a)<nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,-2147483648) S: [0,-2147483648) Exits: (4 + %a)<nsw> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i32 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><%loop> U: [1,-2147483647) S: [1,-2147483647) Exits: (5 + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @rewrite_preserve_add_nsw
diff --git a/llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll b/llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll
index a0ae9b63ac02a..d3e51bce99faa 100644
--- a/llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll
+++ b/llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll
@@ -13,11 +13,11 @@ define void @test(ptr %arg) {
 ; CHECK-NEXT:    %ptr2 = phi ptr [ %ptr2.next, %loop.latch ], [ null, %entry ]
 ; CHECK-NEXT:    --> %ptr2 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop.header: Variant, %loop2.header: Invariant }
 ; CHECK-NEXT:    %ptr1.next = phi ptr [ %ptr2, %loop.header ], [ %ptr1.next.next, %loop2.latch ]
-; CHECK-NEXT:    --> {%ptr2,+,8}<nuw><%loop2.header> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop2.header: Computable, %loop.header: Variant }
+; CHECK-NEXT:    --> {%ptr2,+,8}<nuw><nsw><%loop2.header> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop2.header: Computable, %loop.header: Variant }
 ; CHECK-NEXT:    %iv = phi i64 [ 0, %loop.header ], [ %iv.next, %loop2.latch ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop2.header> U: [0,1) S: [0,1) Exits: <<Unknown>> LoopDispositions: { %loop2.header: Computable, %loop.header: Uniform }
 ; CHECK-NEXT:    %ptr1.dummy = getelementptr inbounds i64, ptr %ptr1.next, i64 0
-; CHECK-NEXT:    --> {%ptr2,+,8}<nuw><%loop2.header> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop2.header: Computable, %loop.header: Variant }
+; CHECK-NEXT:    --> {%ptr2,+,8}<nuw><nsw><%loop2.header> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop2.header: Computable, %loop.header: Variant }
 ; CHECK-NEXT:    %val = load i64, ptr %ptr1.dummy, align 8
 ; CHECK-NEXT:    --> %val U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop2.header: Variant, %loop.header: Variant }
 ; CHECK-NEXT:    %ptr1.next.next = getelementptr inbounds i64, ptr %ptr1.next, i64 1
diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
index 9f9dd6f3c11af..a13da89b4d6e9 100644
--- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
@@ -47,13 +47,13 @@ define void @test_00(i1 %arg) {
 ; CHECK-NEXT:    %s2 = add i32 %phi5, %phi2
 ; CHECK-NEXT:    --> {{\{\{}}57,+,2}<nuw><nsw><%loop1>,+,2}<nw><%loop2> U: [57,668) S: [57,668) --> 667 U: [667,668) S: [667,668)
 ; CHECK-NEXT:    %s3 = add i32 %sum1, %sum3
-; CHECK-NEXT:    --> {{\{\{}}130,+,3}<%loop1>,+,3}<%loop2> U: [130,1046) S: [130,1046) --> 1045 U: [1045,1046) S: [1045,1046)
+; CHECK-NEXT:    --> {{\{\{}}130,+,3}<nuw><nsw><%loop1>,+,3}<nw><%loop2> U: [130,1046) S: [130,1046) --> 1045 U: [1045,1046) S: [1045,1046)
 ; CHECK-NEXT:    %s4 = add i32 %sum4, %sum2
-; CHECK-NEXT:    --> {{\{\{}}179,+,6}<%loop1>,+,6}<%loop2> U: [179,2010) S: [179,2010) --> 2009 U: [2009,2010) S: [2009,2010)
+; CHECK-NEXT:    --> {{\{\{}}179,+,6}<nuw><nsw><%loop1>,+,6}<nw><%loop2> U: [179,2010) S: [179,2010) --> 2009 U: [2009,2010) S: [2009,2010)
 ; CHECK-NEXT:    %s5 = add i32 %phi3, %sum3
-; CHECK-NEXT:    --> {{\{\{}}122,+,3}<nuw><nsw><%loop1>,+,3}<%loop2> U: [122,1038) S: [122,1038) --> 1037 U: [1037,1038) S: [1037,1038)
+; CHECK-NEXT:    --> {{\{\{}}122,+,3}<nuw><nsw><%loop1>,+,3}<nw><%loop2> U: [122,1038) S: [122,1038) --> 1037 U: [1037,1038) S: [1037,1038)
 ; CHECK-NEXT:    %s6 = add i32 %sum2, %phi6
-; CHECK-NEXT:    --> {{\{\{}}63,+,6}<%loop1>,+,3}<nw><%loop2> U: [63,1471) S: [63,1471) --> 1470 U: [1470,1471) S: [1470,1471)
+; CHECK-NEXT:    --> {{\{\{}}63,+,6}<nuw><nsw><%loop1>,+,3}<nw><%loop2> U: [63,1471) S: [63,1471) --> 1470 U: [1470,1471) S: [1470,1471)
 ; CHECK-NEXT:  Determining loop execution counts for: @test_00
 ; CHECK-NEXT:  Loop %loop2: backedge-taken count is i32 141
 ; CHECK-NEXT:  Loop %loop2: constant max backedge-taken count is i32 141
@@ -143,9 +143,9 @@ define void @test_01(i32 %a, i32 %b) {
 ; CHECK-NEXT:    %sum4 = add i32 %sum3, %phi6
 ; CHECK-NEXT:    --> {159,+,6}<%loop2> U: [159,1162) S: [159,1162) Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable }
 ; CHECK-NEXT:    %is2 = add i32 %sum4, %b
-; CHECK-NEXT:    --> {(159 + %b),+,6}<%loop2> U: full-set S: full-set Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + %b) LoopDispositions: { %loop2: Computable }
+; CHECK-NEXT:    --> {(159 + %b),+,6}<nw><%loop2> U: full-set S: full-set Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + %b) LoopDispositions: { %loop2: Computable }
 ; CHECK-NEXT:    %ec2 = add i32 %is1, %is2
-; CHECK-NEXT:    --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> {(165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))),+,6}<%loop2> U: full-set S: full-set Exits: (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable }
+; CHECK-NEXT:    --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<nw><%loop2> U: full-set S: full-set --> {(165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))),+,6}<nw><%loop2> U: full-set S: full-set Exits: (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable }
 ; CHECK-NEXT:    %s1 = add i32 %phi1, %is1
 ; CHECK-NEXT:    --> {(6 + (3 * %a) + %b),+,7}<%loop1> U: full-set S: full-set --> (6 + (3 * %a) + (7 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) U: full-set S: full-set
 ; CHECK-NEXT:    %s2 = add i32 %is2, %phi4
@@ -153,11 +153,11 @@ define void @test_01(i32 %a, i32 %b) {
 ; CHECK-NEXT:    %s3 = add i32 %is1, %phi5
 ; CHECK-NEXT:    --> {{\{\{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<nw><%loop2> U: full-set S: full-set --> (59 + (2 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))<nuw><nsw> + (2 * %a) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) U: full-set S: full-set
 ; CHECK-NEXT:    %s4 = add i32 %phi2, %is2
-; CHECK-NEXT:    --> {{\{\{}}(159 + (2 * %b)),+,2}<nw><%loop1>,+,6}<%loop2> U: full-set S: full-set --> (159 + (2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))<nuw><nsw> + (2 * %b) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
+; CHECK-NEXT:    --> {{\{\{}}(159 + (2 * %b)),+,2}<nw><%loop1>,+,6}<nw><%loop2> U: full-set S: full-set --> (159 + (2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))<nuw><nsw> + (2 * %b) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
 ; CHECK-NEXT:    %s5 = add i32 %is1, %is2
-; CHECK-NEXT:    --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
+; CHECK-NEXT:    --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<nw><%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
 ; CHECK-NEXT:    %s6 = add i32 %is2, %is1
-; CHECK-NEXT:    --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
+; CHECK-NEXT:    --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<nw><%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
 ; CHECK-NEXT:  Determining loop execution counts for: @test_01
 ; CHECK-NEXT:  Loop %loop2: backedge-taken count is (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))
 ; CHECK-NEXT:  Loop %loop2: constant max backedge-taken count is i32 167
@@ -520,15 +520,15 @@ define void @test_06() {
 ; CHECK-NEXT:    %s1 = add i32 %phi1, %phi2
 ; CHECK-NEXT:    --> {{\{\{}}30,+,1}<nuw><nsw><%loop1>,+,2}<nw><%loop2> U: [30,1998) S: [30,1998) --> 1997 U: [1997,1998) S: [1997,1998)
 ; CHECK-NEXT:    %s2 = add i32 %phi2, %phi1
-; CHECK-NEXT:    --> {{\{\{}}30,+,1}<nuw><nsw><%loop1>,+,2}<nw><%loop2> U: [30,1998) S: [30,1998) --> 1997 U: [1997,1998) S: [1997,1998)
+; CHECK-NEXT:    --> {{\{\{}}30,+,1}<nuw><nsw><%loop1>,+,2}<nuw><nsw><%loop2> U: [30,1998) S: [30,1998) --> 1997 U: [1997,1998) S: [1997,1998)
 ; CHECK-NEXT:    %s3 = add i32 %phi1, %phi3
 ; CHECK-NEXT:    --> {{\{\{}}40,+,1}<nuw><nsw><%loop1>,+,3}<nw><%loop3> U: [40,1999) S: [40,1999) --> 1998 U: [1998,1999) S: [1998,1999)
 ; CHECK-NEXT:    %s4 = add i32 %phi3, %phi1
-; CHECK-NEXT:    --> {{\{\{}}40,+,1}<nuw><nsw><%loop1>,+,3}<nw><%loop3> U: [40,1999) S: [40,1999) --> 1998 U: [1998,1999) S: [1998,1999)
+; CHECK-NEXT:    --> {{\{\{}}40,+,1}<nuw><nsw><%loop1>,+,3}<nuw><nsw><%loop3> U: [40,1999) S: [40,1999) --> 1998 U: [1998,1999) S: [1998,1999)
 ; CHECK-NEXT:    %s5 = add i32 %phi2, %phi3
 ; CHECK-NEXT:    --> {{\{\{}}50,+,2}<nuw><nsw><%loop2>,+,3}<nw><%loop3> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998)
 ; CHECK-NEXT:    %s6 = add i32 %phi3, %phi2
-; CHECK-NEXT:    --> {{\{\{}}50,+,2}<nuw><nsw><%loop2>,+,3}<nw><%loop3> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998)
+; CHECK-NEXT:    --> {{\{\{}}50,+,2}<nuw><nsw><%loop2>,+,3}<nuw><nsw><%loop3> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998)
 ; CHECK-NEXT:  Determining loop execution counts for: @test_06
 ; CHECK-NEXT:  Loop %loop3: backedge-taken count is i32 323
 ; CHECK-NEXT:  Loop %loop3: constant max backedge-taken count is i32 323
@@ -598,15 +598,15 @@ define void @test_07() {
 ; CHECK-NEXT:    %s1 = add i32 %phi1, %phi2
 ; CHECK-NEXT:    --> {{\{\{}}30,+,1}<nuw><nsw><%loop1>,+,2}<nw><%loop2> U: [30,1009) S: [30,1009) --> 1008 U: [1008,1009) S: [1008,1009)
 ; CHECK-NEXT:    %s2 = add i32 %phi2, %phi1
-; CHECK-NEXT:    --> {{\{\{}}30,+,1}<nuw><nsw><%loop1>,+,2}<nw><%loop2> U: [30,1009) S: [30,1009) --> 1008 U: [1008,1009) S: [1008,1009)
+; CHECK-NEXT:    --> {{\{\{}}30,+,1}<nuw><nsw><%loop1>,+,2}<nuw><nsw><%loop2> U: [30,1009) S: [30,1009) --> 1008 U: [1008,1009) S: [1008,1009)
 ; CHECK-NEXT:    %s3 = add i32 %phi1, %phi3
 ; CHECK-NEXT:    --> {{\{\{}}40,+,3}<nuw><nsw><%loop3>,+,1}<nw><%loop1> U: [40,1010) S: [40,1010) --> 1009 U: [1009,1010) S: [1009,1010)
 ; CHECK-NEXT:    %s4 = add i32 %phi3, %phi1
-; CHECK-NEXT:    --> {{\{\{}}40,+,3}<nuw><nsw><%loop3>,+,1}<nw><%loop1> U: [40,1010) S: [40,1010) --> 1009 U: [1009,1010) S: [1009,1010)
+; CHECK-NEXT:    --> {{\{\{}}40,+,3}<nuw><nsw><%loop3>,+,1}<nuw><nsw><%loop1> U: [40,1010) S: [40,1010) --> 1009 U: [1009,1010) S: [1009,1010)
 ; CHECK-NEXT:    %s5 = add i32 %phi2, %phi3
 ; CHECK-NEXT:    --> {{\{\{}}50,+,3}<nuw><nsw><%loop3>,+,2}<nw><%loop2> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998)
 ; CHECK-NEXT:    %s6 = add i32 %phi3, %phi2
-; CHECK-NEXT:    --> {{\{\{}}50,+,3}<nuw><nsw><%loop3>,+,2}<nw><%loop2> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998)
+; CHECK-NEXT:    --> {{\{\{}}50,+,3}<nuw><nsw><%loop3>,+,2}<nuw><nsw><%loop2> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998)
 ; CHECK-NEXT:  Determining loop execution counts for: @test_07
 ; CHECK-NEXT:  Loop %loop2: backedge-taken count is i32 489
 ; CHECK-NEXT:  Loop %loop2: constant max backedge-taken count is i32 489
@@ -684,7 +684,7 @@ define void @test_08() {
 ; CHECK-NEXT:    %tmp12 = trunc i64 %tmp11 to i32
 ; CHECK-NEXT:    --> ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) U: full-set S: full-set Exits: 0 LoopDispositions: { %loop_2: Variant }
 ; CHECK-NEXT:    %tmp14 = mul i32 %tmp12, %tmp7
-; CHECK-NEXT:    --> (((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) * {-1,+,-1}<%loop_1>) U: full-set S: full-set --> (-2 * ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>)) U: [0,-1) S: [-2147483648,2147483647) Exits: 0 LoopDispositions: { %loop_2: Variant }
+; CHECK-NEXT:    --> (((trunc i64 %iv.2.1 to i32) + {0,+,-1}<nuw><nsw><%loop_2>) * {-1,+,-1}<nsw><%loop_1>) U: full-set S: full-set --> (-2 * ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<nuw><nsw><%loop_2>)) U: [0,-1) S: [-2147483648,2147483647) Exits: 0 LoopDispositions: { %loop_2: Variant }
 ; CHECK-NEXT:    %tmp16 = mul i64 %iv.2.1, %iv.1.1
 ; CHECK-NEXT:    --> ({2,+,1}<nuw><nsw><%loop_1> * %iv.2.1) U: full-set S: full-set --> (3 * %iv.2.1) U: full-set S: full-set Exits: 0 LoopDispositions: { %loop_2: Variant }
 ; CHECK-NEXT:    %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1
diff --git a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
index 1e15d2d0d6461..3fde1dfb963c4 100644
--- a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
@@ -153,7 +153,6 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) {
 ; CHECK-NEXT:    exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    predicated exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
 ; CHECK-NEXT:     Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    exit count for latch: %N
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i32 -1
@@ -161,18 +160,14 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) {
 ; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    predicated symbolic max exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
 ; CHECK-NEXT:     Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    symbolic max exit count for latch: %N
 ; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4294967295
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ;
 entry:
   br label %loop
@@ -198,7 +193,6 @@ define void @le_from_zero_no_nuw(i32 %M, i32 %N) {
 ; CHECK-NEXT:    exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    predicated exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
 ; CHECK-NEXT:     Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    exit count for latch: %N
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i32 -1
@@ -206,18 +200,14 @@ define void @le_from_zero_no_nuw(i32 %M, i32 %N) {
 ; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    predicated symbolic max exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
 ; CHECK-NEXT:     Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    symbolic max exit count for latch: %N
 ; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4294967295
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ;
 entry:
   br label %loop
@@ -417,7 +407,7 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) {
 ; CHECK-NEXT:    exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    predicated exit count for loop: (2147483649 + (sext i32 %M to i64))<nsw>
 ; CHECK-NEXT:     Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nssw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nssw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    exit count for latch: (-2147483648 + %N)
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i32 -1
@@ -425,18 +415,18 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) {
 ; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    predicated symbolic max exit count for loop: (2147483649 + (sext i32 %M to i64))<nsw>
 ; CHECK-NEXT:     Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nssw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nssw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    symbolic max exit count for latch: (-2147483648 + %N)
 ; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nssw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nssw>
 ; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4294967295
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nssw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nssw>
 ; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nssw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nssw>
 ;
 entry:
   br label %loop
@@ -462,7 +452,7 @@ define void @le_from_int_min_no_nuw_nsw(i32 %M, i32 %N) {
 ; CHECK-NEXT:    exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    predicated exit count for loop: (-2147483648 + (2147483648 umax (1 + (zext i32 %M to i64))<nuw><nsw>))<nsw>
 ; CHECK-NEXT:     Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nusw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    exit count for latch: (-2147483648 + %N)
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i32 -1
@@ -470,18 +460,18 @@ define void @le_from_int_min_no_nuw_nsw(i32 %M, i32 %N) {
 ; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    predicated symbolic max exit count for loop: (-2147483648 + (2147483648 umax (1 + (zext i32 %M to i64))<nuw><nsw>))<nsw>
 ; CHECK-NEXT:     Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nusw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nusw>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    symbolic max exit count for latch: (-2147483648 + %N)
 ; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-2147483648 + (2147483648 umax (1 + (zext i32 %M to i64))<nuw><nsw>))<nsw> umin_seq (zext i32 (-2147483648 + %N) to i64))
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nusw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nusw>
 ; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 2147483648
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nusw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nusw>
 ; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((-2147483648 + (2147483648 umax (1 + (zext i32 %M to i64))<nuw><nsw>))<nsw> umin_seq (zext i32 (-2147483648 + %N) to i64))
 ; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nusw>
+; CHECK-NEXT:      {-2147483648,+,1}<nsw><%loop> Added Flags: <nusw>
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Analysis/ScalarEvolution/exit-count-select-safe.ll b/llvm/test/Analysis/ScalarEvolution/exit-count-select-safe.ll
index c63650aef8fe4..77642f0c3f1ee 100644
--- a/llvm/test/Analysis/ScalarEvolution/exit-count-select-safe.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exit-count-select-safe.ll
@@ -5,9 +5,9 @@ define i32 @logical_and_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: 'logical_and_2ops'
 ; CHECK-NEXT:  Classifying expressions for: @logical_and_2ops
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_2ops
@@ -33,9 +33,9 @@ define i32 @logical_or_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: 'logical_or_2ops'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_2ops
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 true, i1 %cond_p1
 ; CHECK-NEXT:    --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1))) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_or_2ops
@@ -61,9 +61,9 @@ define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: 'logical_and_3ops'
 ; CHECK-NEXT:  Classifying expressions for: @logical_and_3ops
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond_p3 = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %cond = select i1 %cond_p3, i1 %cond_p2, i1 false
@@ -93,9 +93,9 @@ define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: 'logical_or_3ops'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_3ops
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond_p3 = select i1 %cond_p0, i1 true, i1 %cond_p1
 ; CHECK-NEXT:    --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1))) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %cond = select i1 %cond_p3, i1 true, i1 %cond_p2
@@ -125,9 +125,9 @@ define i32 @logical_or_3ops_duplicate(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: 'logical_or_3ops_duplicate'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_3ops_duplicate
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond_p4 = select i1 %cond_p0, i1 true, i1 %cond_p1
 ; CHECK-NEXT:    --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1))) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %cond_p5 = select i1 %cond_p4, i1 true, i1 %cond_p2
@@ -161,9 +161,9 @@ define i32 @logical_or_3ops_redundant_uminseq_operand(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: 'logical_or_3ops_redundant_uminseq_operand'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_3ops_redundant_uminseq_operand
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m)
 ; CHECK-NEXT:    --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:    %cond_p3 = select i1 %cond_p0, i1 true, i1 %cond_p1
@@ -196,9 +196,9 @@ define i32 @logical_or_3ops_redundant_umin_operand(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: 'logical_or_3ops_redundant_umin_operand'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_3ops_redundant_umin_operand
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %k umin_seq %m) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq %k umin_seq %m) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %k umin_seq %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %k umin_seq %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m)
 ; CHECK-NEXT:    --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:    %cond_p3 = select i1 %cond_p0, i1 true, i1 %cond_p1
@@ -231,9 +231,9 @@ define i32 @logical_or_4ops_redundant_operand_across_umins(i32 %n, i32 %m, i32 %
 ; CHECK-LABEL: 'logical_or_4ops_redundant_operand_across_umins'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_4ops_redundant_operand_across_umins
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k umin_seq %q) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k umin_seq %q) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k umin_seq %q)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k umin_seq %q)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m)
 ; CHECK-NEXT:    --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:    %umin2 = call i32 @llvm.umin.i32(i32 %n, i32 %q)
@@ -269,9 +269,9 @@ define i32 @logical_or_3ops_operand_wise_redundant_umin(i32 %n, i32 %m, i32 %k)
 ; CHECK-LABEL: 'logical_or_3ops_operand_wise_redundant_umin'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_3ops_operand_wise_redundant_umin
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m)
 ; CHECK-NEXT:    --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:    %umin2 = call i32 @llvm.umin.i32(i32 %n, i32 %k)
@@ -307,9 +307,9 @@ define i32 @logical_or_3ops_partially_redundant_umin(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: 'logical_or_3ops_partially_redundant_umin'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_3ops_partially_redundant_umin
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq (%m umin %k)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq (%m umin %k)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq (%m umin %k))) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq (%m umin %k))) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m)
 ; CHECK-NEXT:    --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:    %umin2 = call i32 @llvm.umin.i32(i32 %umin, i32 %k)
@@ -341,21 +341,21 @@ define i32 @logical_or_5ops_redundant_opearand_of_inner_uminseq(i32 %a, i32 %b,
 ; CHECK-LABEL: 'logical_or_5ops_redundant_opearand_of_inner_uminseq'
 ; CHECK-NEXT:  Classifying expressions for: @logical_or_5ops_redundant_opearand_of_inner_uminseq
 ; CHECK-NEXT:    %first.i = phi i32 [ 0, %entry ], [ %first.i.next, %first.loop ]
-; CHECK-NEXT:    --> {0,+,1}<%first.loop> U: full-set S: full-set Exits: (%e umin_seq %d umin_seq %a) LoopDispositions: { %first.loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%first.loop> U: full-set S: full-set Exits: (%e umin_seq %d umin_seq %a) LoopDispositions: { %first.loop: Computable }
 ; CHECK-NEXT:    %first.i.next = add i32 %first.i, 1
-; CHECK-NEXT:    --> {1,+,1}<%first.loop> U: full-set S: full-set Exits: (1 + (%e umin_seq %d umin_seq %a)) LoopDispositions: { %first.loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%first.loop> U: full-set S: full-set Exits: (1 + (%e umin_seq %d umin_seq %a)) LoopDispositions: { %first.loop: Computable }
 ; CHECK-NEXT:    %cond_p3 = select i1 %cond_p0, i1 true, i1 %cond_p1
 ; CHECK-NEXT:    --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1))) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %first.loop: Variant }
 ; CHECK-NEXT:    %cond_p4 = select i1 %cond_p3, i1 true, i1 %cond_p2
 ; CHECK-NEXT:    --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1) umin_seq (true + %cond_p2))) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %first.loop: Variant }
 ; CHECK-NEXT:    %i = phi i32 [ 0, %first.loop.exit ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%a umin_seq %b umin_seq ((%e umin_seq %d) umin %c)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%a umin_seq %b umin_seq ((%e umin_seq %d) umin %c)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%a umin_seq %b umin_seq ((%e umin_seq %d) umin %c))) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%a umin_seq %b umin_seq ((%e umin_seq %d) umin %c))) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %umin = call i32 @llvm.umin.i32(i32 %c, i32 %d)
 ; CHECK-NEXT:    --> (%c umin %d) U: full-set S: full-set Exits: (%c umin %d) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:    %umin2 = call i32 @llvm.umin.i32(i32 %umin, i32 %first.i)
-; CHECK-NEXT:    --> ({0,+,1}<%first.loop> umin %c umin %d) U: full-set S: full-set --> ((%e umin_seq %d umin_seq %a) umin %c umin %d) U: full-set S: full-set Exits: ((%e umin_seq %d umin_seq %a) umin %c umin %d) LoopDispositions: { %loop: Invariant }
+; CHECK-NEXT:    --> ({0,+,1}<nuw><%first.loop> umin %c umin %d) U: full-set S: full-set --> ((%e umin_seq %d umin_seq %a) umin %c umin %d) U: full-set S: full-set Exits: ((%e umin_seq %d umin_seq %a) umin %c umin %d) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:    %cond_p8 = select i1 %cond_p5, i1 true, i1 %cond_p6
 ; CHECK-NEXT:    --> (true + ((true + %cond_p5) umin_seq (true + %cond_p6))) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %cond = select i1 %cond_p8, i1 true, i1 %cond_p7
@@ -501,15 +501,15 @@ define i64 @uminseq_vs_ptrtoint_complexity(i64 %n, i64 %m, ptr %ptr) {
 ; CHECK-LABEL: 'uminseq_vs_ptrtoint_complexity'
 ; CHECK-NEXT:  Classifying expressions for: @uminseq_vs_ptrtoint_complexity
 ; CHECK-NEXT:    %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i64 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %ptr.int = ptrtoint ptr %ptr to i64
 ; CHECK-NEXT:    --> (ptrtoint ptr %ptr to i64) U: full-set S: full-set
 ; CHECK-NEXT:    %r = add i64 %i, %ptr.int
-; CHECK-NEXT:    --> {(ptrtoint ptr %ptr to i64),+,1}<%loop> U: full-set S: full-set --> ((%n umin_seq %m) + (ptrtoint ptr %ptr to i64)) U: full-set S: full-set
+; CHECK-NEXT:    --> {(ptrtoint ptr %ptr to i64),+,1}<nw><%loop> U: full-set S: full-set --> ((%n umin_seq %m) + (ptrtoint ptr %ptr to i64)) U: full-set S: full-set
 ; CHECK-NEXT:  Determining loop execution counts for: @uminseq_vs_ptrtoint_complexity
 ; CHECK-NEXT:  Loop %loop: backedge-taken count is (%n umin_seq %m)
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 -1
@@ -537,9 +537,9 @@ define i32 @logical_and_implies_poison1(i32 %n) {
 ; CHECK-NEXT:    %add = add i32 %n, 1
 ; CHECK-NEXT:    --> (1 + %n) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((1 + %n) umin %n) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((1 + %n) umin %n) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison1
@@ -568,9 +568,9 @@ define i32 @logical_and_implies_poison2(i32 %n) {
 ; CHECK-NEXT:    %add = add i32 %n, 1
 ; CHECK-NEXT:    --> (1 + %n) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((1 + %n) umin %n) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((1 + %n) umin %n) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p1 umin %cond_p0) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison2
@@ -599,9 +599,9 @@ define i32 @logical_and_implies_poison3(i32 %n, i32 %m) {
 ; CHECK-NEXT:    %add = add i32 %n, %m
 ; CHECK-NEXT:    --> (%n + %m) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin %n) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((%n + %m) umin %n) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin %n)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin %n)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p1 umin %cond_p0) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison3
@@ -630,9 +630,9 @@ define i32 @logical_and_implies_poison_wrong_direction(i32 %n, i32 %m) {
 ; CHECK-NEXT:    %add = add i32 %n, %m
 ; CHECK-NEXT:    --> (%n + %m) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq (%n + %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq (%n + %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq (%n + %m))) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq (%n + %m))) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison_wrong_direction
@@ -659,9 +659,9 @@ define i32 @logical_and_implies_poison_noundef(i32 %n, i32 noundef %m) {
 ; CHECK-LABEL: 'logical_and_implies_poison_noundef'
 ; CHECK-NEXT:  Classifying expressions for: @logical_and_implies_poison_noundef
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison_noundef
@@ -687,9 +687,9 @@ define i32 @logical_and_implies_poison_noundef_wrong_direction(i32 %n, i32 nound
 ; CHECK-LABEL: 'logical_and_implies_poison_noundef_wrong_direction'
 ; CHECK-NEXT:  Classifying expressions for: @logical_and_implies_poison_noundef_wrong_direction
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%m umin_seq %n) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%m umin_seq %n) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%m umin_seq %n)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%m umin_seq %n)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison_noundef_wrong_direction
@@ -719,9 +719,9 @@ define i32 @logical_and_implies_poison_complex1(i32 %n, i32 %m) {
 ; CHECK-NEXT:    %add1 = add i32 %add, 1
 ; CHECK-NEXT:    --> (1 + %n + %m) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin (1 + %n + %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((%n + %m) umin (1 + %n + %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin (1 + %n + %m))) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin (1 + %n + %m))) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison_complex1
@@ -753,9 +753,9 @@ define i32 @logical_and_implies_poison_complex2(i32 %n, i32 %m, i32 %l) {
 ; CHECK-NEXT:    %add1 = add i32 %add, %l
 ; CHECK-NEXT:    --> (%n + %m + %l) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin (%n + %m + %l)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((%n + %m) umin (%n + %m + %l)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin (%n + %m + %l))) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin (%n + %m + %l))) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison_complex2
@@ -787,9 +787,9 @@ define i32 @logical_and_implies_poison_complex_wrong_direction(i32 %n, i32 %m, i
 ; CHECK-NEXT:    %add1 = add i32 %add, %l
 ; CHECK-NEXT:    --> (%n + %m + %l) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin_seq (%n + %m + %l)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: ((%n + %m) umin_seq (%n + %m + %l)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin_seq (%n + %m + %l))) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin_seq (%n + %m + %l))) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_implies_poison_complex_wrong_direction
@@ -819,9 +819,9 @@ define i32 @logical_and_implies_multiple_ops(i32 %n, i32 %m) {
 ; CHECK-NEXT:    %add = add i32 %n, 1
 ; CHECK-NEXT:    --> (1 + %n) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (((1 + %n) umin %n) umin_seq %m) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (((1 + %n) umin %n) umin_seq %m) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (((1 + %n) umin %n) umin_seq %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (((1 + %n) umin %n) umin_seq %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %cond2 = select i1 %cond, i1 %cond_p2, i1 false
@@ -854,9 +854,9 @@ define i32 @logical_and_implies_multiple_ops2(i32 %n, i32 %m) {
 ; CHECK-NEXT:    %add = add i32 %n, 1
 ; CHECK-NEXT:    --> (1 + %n) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq ((1 + %n) umin %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq ((1 + %n) umin %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq ((1 + %n) umin %m))) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq ((1 + %n) umin %m))) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %cond2 = select i1 %cond, i1 %cond_p2, i1 false
@@ -889,9 +889,9 @@ define i32 @logical_and_implies_multiple_ops3(i32 %n, i32 %m) {
 ; CHECK-NEXT:    %add = add i32 %n, 1
 ; CHECK-NEXT:    --> (1 + %n) U: full-set S: full-set
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%m umin_seq ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%m umin_seq ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%m umin_seq ((1 + %n) umin %n))) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%m umin_seq ((1 + %n) umin %n))) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %cond2 = select i1 %cond, i1 %cond_p2, i1 false
@@ -990,9 +990,9 @@ define i32 @logical_and_not_zero_needs_context(i32 %n, i32 %m) {
 ; CHECK-LABEL: 'logical_and_not_zero_needs_context'
 ; CHECK-NEXT:  Classifying expressions for: @logical_and_not_zero_needs_context
 ; CHECK-NEXT:    %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %i.next = add i32 %i, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %cond = select i1 %cond_p0, i1 %cond_p1, i1 false
 ; CHECK-NEXT:    --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:  Determining loop execution counts for: @logical_and_not_zero_needs_context
diff --git a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
index f2de5e32f082e..b00c26d472eaf 100644
--- a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
@@ -17,11 +17,11 @@ define dso_local i32 @f() {
 ; CHECK-NEXT:    %storemerge23 = phi i32 [ 3, %entry ], [ %dec16, %for.inc13.3 ]
 ; CHECK-NEXT:    --> {3,+,-1}<nsw><%outer.loop> U: [1,4) S: [1,4) Exits: <<Unknown>> LoopDispositions: { %outer.loop: Computable, %for.cond6: Invariant, %inner.loop: Invariant }
 ; CHECK-NEXT:    %storemerge1921 = phi i32 [ 3, %outer.loop ], [ %dec, %for.end ]
-; CHECK-NEXT:    --> {3,+,-1}<nsw><%for.cond6> U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform }
+; CHECK-NEXT:    --> {3,+,-1}<nuw><nsw><%for.cond6> U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform }
 ; CHECK-NEXT:    %idxprom20 = zext i32 %storemerge1921 to i64
-; CHECK-NEXT:    --> (zext i32 {3,+,-1}<nsw><%for.cond6> to i64) U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform }
+; CHECK-NEXT:    --> (zext i32 {3,+,-1}<nuw><nsw><%for.cond6> to i64) U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform }
 ; CHECK-NEXT:    %arrayidx7 = getelementptr inbounds [1 x [4 x i16]], ptr @__const.f.g, i64 0, i64 0, i64 %idxprom20
-; CHECK-NEXT:    --> ((2 * (zext i32 {3,+,-1}<nsw><%for.cond6> to i64))<nuw><nsw> + @__const.f.g)<nuw> U: [8,-3) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform }
+; CHECK-NEXT:    --> ((2 * (zext i32 {3,+,-1}<nuw><nsw><%for.cond6> to i64))<nuw><nsw> + @__const.f.g)<nuw> U: [8,-3) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform }
 ; CHECK-NEXT:    %i = load i16, ptr %arrayidx7, align 2
 ; CHECK-NEXT:    --> %i U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond6: Variant, %outer.loop: Variant }
 ; CHECK-NEXT:    %storemerge1822.lcssa.ph = phi i32 [ 0, %for.cond6 ]
@@ -41,11 +41,11 @@ define dso_local i32 @f() {
 ; CHECK-NEXT:    %retval.0 = phi i32 [ %i1, %if.end ], [ 0, %cleanup.loopexit ]
 ; CHECK-NEXT:    --> %retval.0 U: full-set S: full-set
 ; CHECK-NEXT:    %storemerge1921.3 = phi i32 [ 3, %for.end ], [ %dec.3, %for.end.3 ]
-; CHECK-NEXT:    --> {3,+,-1}<nsw><%inner.loop> U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform }
+; CHECK-NEXT:    --> {3,+,-1}<nuw><nsw><%inner.loop> U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform }
 ; CHECK-NEXT:    %idxprom20.3 = zext i32 %storemerge1921.3 to i64
-; CHECK-NEXT:    --> (zext i32 {3,+,-1}<nsw><%inner.loop> to i64) U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform }
+; CHECK-NEXT:    --> (zext i32 {3,+,-1}<nuw><nsw><%inner.loop> to i64) U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform }
 ; CHECK-NEXT:    %arrayidx7.3 = getelementptr inbounds [1 x [4 x i16]], ptr @__const.f.g, i64 0, i64 0, i64 %idxprom20.3
-; CHECK-NEXT:    --> ((2 * (zext i32 {3,+,-1}<nsw><%inner.loop> to i64))<nuw><nsw> + @__const.f.g)<nuw> U: [8,-3) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform }
+; CHECK-NEXT:    --> ((2 * (zext i32 {3,+,-1}<nuw><nsw><%inner.loop> to i64))<nuw><nsw> + @__const.f.g)<nuw> U: [8,-3) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform }
 ; CHECK-NEXT:    %i7 = load i16, ptr %arrayidx7.3, align 2
 ; CHECK-NEXT:    --> %i7 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant }
 ; CHECK-NEXT:    %i8 = load volatile i32, ptr @b, align 4
@@ -53,7 +53,7 @@ define dso_local i32 @f() {
 ; CHECK-NEXT:    %dec.3 = add nsw i32 %storemerge1921.3, -1
 ; CHECK-NEXT:    --> {2,+,-1}<nsw><%inner.loop> U: [2,3) S: [2,3) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform }
 ; CHECK-NEXT:    %storemerge1921.lcssa25.3 = phi i32 [ %storemerge1921.3, %for.end.3 ]
-; CHECK-NEXT:    --> {3,+,-1}<nsw><%inner.loop> U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %outer.loop: Uniform, %for.cond6: Variant, %inner.loop: Computable }
+; CHECK-NEXT:    --> {3,+,-1}<nuw><nsw><%inner.loop> U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %outer.loop: Uniform, %for.cond6: Variant, %inner.loop: Computable }
 ; CHECK-NEXT:    %dec16 = add nsw i32 %storemerge23, -1
 ; CHECK-NEXT:    --> {2,+,-1}<nsw><%outer.loop> U: [0,3) S: [0,3) Exits: <<Unknown>> LoopDispositions: { %outer.loop: Computable, %for.cond6: Invariant, %inner.loop: Invariant }
 ; CHECK-NEXT:  Determining loop execution counts for: @f
diff --git a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll
index 2264bfe4fce6c..0caa0a3f0f15c 100644
--- a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll
+++ b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll
@@ -175,7 +175,7 @@ define void @f3(i1 %c) {
 ; CHECK-NEXT:    %loop.iv = phi i16 [ 0, %entry ], [ %loop.iv.inc, %loop ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,128) S: [0,128) Exits: 127 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv = phi i16 [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,%step}<nuw><%loop> U: [0,-892) S: [0,-892) Exits: ((127 * %step)<nuw> + %start) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,%step}<%loop> U: [0,-892) S: [0,-892) Exits: ((127 * %step)<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.zext = zext i16 %iv to i64
 ; CHECK-NEXT:    --> {(zext i16 %start to i64),+,(zext i16 %step to i64)}<nuw><%loop> U: [0,64644) S: [0,64644) Exits: ((zext i16 %start to i64) + (127 * (zext i16 %step to i64))<nuw><nsw>) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i16 %iv, %step
@@ -358,9 +358,9 @@ define void @f7(i1 %c) {
 ; CHECK-NEXT:    %iv.next = add i32 %iv, %step
 ; CHECK-NEXT:    --> {(%step + %start),+,%step}<nw><%loop> U: [-256,256) S: [-256,256) Exits: ((128 * %step)<nsw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.trunc.plus.one = add i16 %iv.trunc, 1
-; CHECK-NEXT:    --> {(1 + (trunc i32 %start to i16))<nuw><nsw>,+,(trunc i32 %step to i16)}<%loop> U: [1,129) S: [1,129) Exits: (1 + (trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))<nsw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(1 + (trunc i32 %start to i16))<nuw><nsw>,+,(trunc i32 %step to i16)}<nsw><%loop> U: [1,129) S: [1,129) Exits: (1 + (trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))<nsw>) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.trunc.plus.two = add i16 %iv.trunc, 2
-; CHECK-NEXT:    --> {(2 + (trunc i32 %start to i16))<nuw><nsw>,+,(trunc i32 %step to i16)}<%loop> U: [2,130) S: [2,130) Exits: (2 + (trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))<nsw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(2 + (trunc i32 %start to i16))<nuw><nsw>,+,(trunc i32 %step to i16)}<nsw><%loop> U: [2,130) S: [2,130) Exits: (2 + (trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))<nsw>) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %loop.iv.inc = add i16 %loop.iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,129) S: [1,129) Exits: 128 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @f7
diff --git a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
index 87697da20a30b..e0016f3055484 100644
--- a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
+++ b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
@@ -115,7 +115,7 @@ exit:
 define void @test_trunc(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
 ; CHECK-LABEL: @test_trunc
 ; CHECK:          %trunc2 = trunc i64 %iv2.inc to i32
-; CHECK-NEXT:     -->  {(trunc i64 (1 + {7,+,1}<%loop>)<nuw><nsw> to i32),+,1}<%loop2> U: [8,53) S: [8,53)  -->  52 U: [52,53) S: [52,53)
+; CHECK-NEXT:     -->  {(trunc i64 (1 + {7,+,1}<nuw><nsw><%loop>)<nuw><nsw> to i32),+,1}<%loop2> U: [8,53) S: [8,53)  -->  52 U: [52,53) S: [52,53)
 entry:
   br label %loop
 
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll
index 17a6b706685c4..512c2a9e1f132 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll
@@ -218,7 +218,7 @@ define void @const_max_btc_32_or_order_1(i64 %n) {
 ; CHECK-NEXT:    %and.pre = and i1 %pre.1, %pre.0
 ; CHECK-NEXT:    --> (%pre.1 umin %pre.0) U: full-set S: full-set
 ; CHECK-NEXT:    %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ]
-; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %n LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %n LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><%loop> U: [1,-9223372036854775807) S: [1,-9223372036854775807) Exits: (1 + %n) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @const_max_btc_32_or_order_1
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
index f0e732968c88a..fac1e2a9e6986 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
@@ -808,9 +808,9 @@ define void @rewrite_add_rec() {
 ; CHECK-NEXT:    %n.vec = and i64 %sub, -2
 ; CHECK-NEXT:    --> (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw> U: [0,9) S: [0,9) Exits: 0 LoopDispositions: { %outer.header: Computable, %inner: Invariant }
 ; CHECK-NEXT:    %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ]
-; CHECK-NEXT:    --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw>)<nsw> LoopDispositions: { %inner: Computable, %outer.header: Uniform }
+; CHECK-NEXT:    --> {0,+,2}<nuw><%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw>)<nsw> LoopDispositions: { %inner: Computable, %outer.header: Uniform }
 ; CHECK-NEXT:    %inner.iv.next = add i64 %inner.iv, 2
-; CHECK-NEXT:    --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw> LoopDispositions: { %inner: Computable, %outer.header: Uniform }
+; CHECK-NEXT:    --> {2,+,2}<nw><%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ({9,+,-1}<nsw><%outer.header> /u 2))<nuw><nsw> LoopDispositions: { %inner: Computable, %outer.header: Uniform }
 ; CHECK-NEXT:    %iv.next = add i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%outer.header> U: [1,11) S: [1,11) Exits: 10 LoopDispositions: { %outer.header: Computable, %inner: Invariant }
 ; CHECK-NEXT:  Determining loop execution counts for: @rewrite_add_rec
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index 436f0e55840d9..bab3a28d1e8a4 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -1334,7 +1334,7 @@ define i32 @ptr_induction_ult_1(ptr %a, ptr %b) {
 ; CHECK-LABEL: 'ptr_induction_ult_1'
 ; CHECK-NEXT:  Classifying expressions for: @ptr_induction_ult_1
 ; CHECK-NEXT:    %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ]
-; CHECK-NEXT:    --> {%a,+,4}<nw><%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nuw><nsw><%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %ptr.iv.next = getelementptr i32, ptr %ptr.iv, i64 1
 ; CHECK-NEXT:    --> {(4 + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (4 + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @ptr_induction_ult_1
diff --git a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
index 9e7142adb1ae8..e5972e021aeef 100644
--- a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
@@ -163,9 +163,9 @@ define void @btc_depends_on_div_mul(i64 %x) {
 ; CHECK-NEXT:    %masked = and i64 %div.16, 1152921504606846974
 ; CHECK-NEXT:    --> (2 * ((2 * %x) /u 32))<nuw><nsw> U: [0,1152921504606846975) S: [0,1152921504606846975)
 ; CHECK-NEXT:    %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {0,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ((2 * %x) /u 32))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,2}<nuw><%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ((2 * %x) /u 32))<nuw><nsw>)<nsw> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i64 %iv, 2
-; CHECK-NEXT:    --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((2 * %x) /u 32))<nuw><nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {2,+,2}<nw><%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((2 * %x) /u 32))<nuw><nsw> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @btc_depends_on_div_mul
 ; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-2 + (2 * ((2 * %x) /u 32))<nuw><nsw>)<nsw> /u 2)
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 9223372036854775807
diff --git a/llvm/test/Analysis/ScalarEvolution/pr123550.ll b/llvm/test/Analysis/ScalarEvolution/pr123550.ll
index 196f03cad51cd..81bd58bf2ae36 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr123550.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr123550.ll
@@ -12,13 +12,13 @@ define i32 @test() {
 ; CHECK-NEXT:    %srem = srem i32 729259140, %phi
 ; CHECK-NEXT:    --> %srem U: [0,1073741824) S: [0,1073741824) Exits: 130 LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %trunc = trunc i32 %iv2 to i8
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: [1,2) S: [1,2) Exits: 1 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nuw><nsw><%loop> U: [1,2) S: [1,2) Exits: 1 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %urem = urem i8 -83, %trunc
-; CHECK-NEXT:    --> (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> (-83 + ((-83 /u {1,+,1}<nuw><nsw><%loop>) * {-1,+,-1}<nuw><nsw><%loop>)) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %zext = zext i8 %urem to i32
-; CHECK-NEXT:    --> (zext i8 (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) to i32) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> (zext i8 (-83 + ((-83 /u {1,+,1}<nuw><nsw><%loop>) * {-1,+,-1}<nuw><nsw><%loop>)) to i32) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %sub = sub i32 0, %zext
-; CHECK-NEXT:    --> (-1 * (zext i8 (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) to i32))<nuw><nsw> U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> (-1 * (zext i8 (-83 + ((-83 /u {1,+,1}<nuw><nsw><%loop>) * {-1,+,-1}<nuw><nsw><%loop>)) to i32))<nuw><nsw> U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv2.inc = add i32 %iv2, 1
 ; CHECK-NEXT:    --> {2,+,1}<nuw><nsw><%loop> U: [2,3) S: [2,3) Exits: 2 LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %srem.lcssa = phi i32 [ %srem, %loop ]
diff --git a/llvm/test/Analysis/ScalarEvolution/pr22641.ll b/llvm/test/Analysis/ScalarEvolution/pr22641.ll
index fe06973747d90..be79f2971e28c 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr22641.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr22641.ll
@@ -11,7 +11,7 @@ body:
   %conv2 = zext i16 %dec2 to i32
   %conv = zext i16 %dec to i32
 ; CHECK:   %conv = zext i16 %dec to i32
-; CHECK-NEXT: -->  {(zext i16 (-1 + %a) to i32),+,65535}<nuw><%body>
+; CHECK-NEXT: -->  {(zext i16 (-1 + %a) to i32),+,65535}<nuw><nsw><%body>
 ; CHECK-NOT:  -->  {(65535 + (zext i16 %a to i32)),+,65535}<nuw><%body>
 
   br label %cond
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
index 0c1f37bf58601..2b07c944eb1d1 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
@@ -410,7 +410,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X32-NEXT:    %i10 = sub i64 %i9, %i4
 ; X32-NEXT:    --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
-; X32-NEXT:    --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i12 = load i8, ptr %i11, align 1
 ; X32-NEXT:    --> %i12 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i13 = add i8 %i12, %i8
@@ -487,7 +487,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
 ; X32-NEXT:    %i10 = sub i64 %i9, %i4
 ; X32-NEXT:    --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
-; X32-NEXT:    --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
+; X32-NEXT:    --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
 ; X32-NEXT:    %i12 = load i8, ptr %i11, align 1
 ; X32-NEXT:    --> %i12 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i13 = add i8 %i12, %i8
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
index f372847d2e398..572b3f7271b88 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
@@ -1,9 +1,9 @@
 ; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: %tmp3 = sext i8 %tmp2 to i32
-; CHECK: -->  (sext i8 {0,+,1}<%bb1> to i32) U: [-128,128) S: [-128,128)               Exits: -1
+; CHECK: -->  (sext i8 {0,+,1}<nuw><%bb1> to i32) U: [-128,128) S: [-128,128)               Exits: -1
 ; CHECK: %tmp4 = mul i32 %tmp3, %i.02
-; CHECK: -->  ((sext i8 {0,+,1}<%bb1> to i32) * {0,+,1}<nuw><nsw><%bb>) U: [-3968,3938) S: [-3968,3938)                Exits: {0,+,-1}<nsw><%bb>
+; CHECK: -->  ((sext i8 {0,+,1}<nuw><%bb1> to i32) * {0,+,1}<nuw><nsw><%bb>) U: [-3968,3938) S: [-3968,3938)                Exits: {0,+,-1}<nsw><%bb>
 
 ; These sexts are not foldable.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll
index f369bfe0b6312..5d6a1dec72e13 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll
@@ -11,17 +11,17 @@ define void @foo(ptr nocapture %arg, i32 %arg1, i32 %arg2) {
 ; CHECK-NEXT:    %tmp9 = shl i64 %tmp8, 33
 ; CHECK-NEXT:    --> {0,+,8589934592}<nuw><%bb7> U: [0,-17179869183) S: [-9223372036854775808,9223372028264841217) Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64))) LoopDispositions: { %bb7: Computable }
 ; CHECK-NEXT:    %tmp10 = ashr exact i64 %tmp9, 32
-; CHECK-NEXT:    --> (sext i32 {0,+,2}<%bb7> to i64) U: [0,-1) S: [-2147483648,2147483647) Exits: (sext i32 (-2 + (2 * %arg2)) to i64) LoopDispositions: { %bb7: Computable }
+; CHECK-NEXT:    --> (sext i32 {0,+,2}<nuw><%bb7> to i64) U: [0,-1) S: [-2147483648,2147483647) Exits: (sext i32 (-2 + (2 * %arg2)) to i64) LoopDispositions: { %bb7: Computable }
 ; CHECK-NEXT:    %tmp11 = getelementptr inbounds i32, ptr %arg, i64 %tmp10
-; CHECK-NEXT:    --> ((4 * (sext i32 {0,+,2}<%bb7> to i64))<nsw> + %arg) U: full-set S: full-set Exits: ((4 * (sext i32 (-2 + (2 * %arg2)) to i64))<nsw> + %arg) LoopDispositions: { %bb7: Computable }
+; CHECK-NEXT:    --> ((4 * (sext i32 {0,+,2}<nuw><%bb7> to i64))<nsw> + %arg) U: full-set S: full-set Exits: ((4 * (sext i32 (-2 + (2 * %arg2)) to i64))<nsw> + %arg) LoopDispositions: { %bb7: Computable }
 ; CHECK-NEXT:    %tmp12 = load i32, ptr %tmp11, align 4
 ; CHECK-NEXT:    --> %tmp12 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
 ; CHECK-NEXT:    %tmp13 = sub nsw i32 %tmp12, %arg1
 ; CHECK-NEXT:    --> ((-1 * %arg1) + %tmp12) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
 ; CHECK-NEXT:    %tmp14 = or disjoint i64 %tmp10, 1
-; CHECK-NEXT:    --> (1 + (sext i32 {0,+,2}<%bb7> to i64))<nuw><nsw> U: [1,0) S: [-2147483647,2147483648) Exits: (1 + (sext i32 (-2 + (2 * %arg2)) to i64))<nuw><nsw> LoopDispositions: { %bb7: Computable }
+; CHECK-NEXT:    --> (1 + (sext i32 {0,+,2}<nuw><%bb7> to i64))<nuw><nsw> U: [1,0) S: [-2147483647,2147483648) Exits: (1 + (sext i32 (-2 + (2 * %arg2)) to i64))<nuw><nsw> LoopDispositions: { %bb7: Computable }
 ; CHECK-NEXT:    %tmp15 = getelementptr inbounds i32, ptr %arg, i64 %tmp14
-; CHECK-NEXT:    --> (4 + (4 * (sext i32 {0,+,2}<%bb7> to i64))<nsw> + %arg) U: full-set S: full-set Exits: (4 + (4 * (sext i32 (-2 + (2 * %arg2)) to i64))<nsw> + %arg) LoopDispositions: { %bb7: Computable }
+; CHECK-NEXT:    --> (4 + (4 * (sext i32 {0,+,2}<nuw><%bb7> to i64))<nsw> + %arg) U: full-set S: full-set Exits: (4 + (4 * (sext i32 (-2 + (2 * %arg2)) to i64))<nsw> + %arg) LoopDispositions: { %bb7: Computable }
 ; CHECK-NEXT:    %tmp16 = load i32, ptr %tmp15, align 4
 ; CHECK-NEXT:    --> %tmp16 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
 ; CHECK-NEXT:    %tmp17 = mul nsw i32 %tmp16, %arg1
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll
index 5a3517961e1ac..cc8fbf961462e 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll
@@ -562,9 +562,9 @@ declare void @llvm.assume(i1)
 define void @step_is_neg_addrec_slt_8(i64 %n) {
 ; CHECK-LABEL: 'step_is_neg_addrec_slt_8'
 ; CHECK-NEXT:  Determining loop execution counts for: @step_is_neg_addrec_slt_8
-; CHECK-NEXT:  Loop %inner: backedge-taken count is (7 /u {0,+,-1}<%outer.header>)
+; CHECK-NEXT:  Loop %inner: backedge-taken count is (7 /u {0,+,-1}<nuw><nsw><%outer.header>)
 ; CHECK-NEXT:  Loop %inner: constant max backedge-taken count is i32 8
-; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is (7 /u {0,+,-1}<%outer.header>)
+; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is (7 /u {0,+,-1}<nuw><nsw><%outer.header>)
 ; CHECK-NEXT:  Loop %inner: Trip multiple is 1
 ; CHECK-NEXT:  Loop %outer.header: backedge-taken count is i64 0
 ; CHECK-NEXT:  Loop %outer.header: constant max backedge-taken count is i64 0
@@ -600,9 +600,9 @@ exit:
 define void @step_is_neg_addrec_slt_var(i32 %n) {
 ; CHECK-LABEL: 'step_is_neg_addrec_slt_var'
 ; CHECK-NEXT:  Determining loop execution counts for: @step_is_neg_addrec_slt_var
-; CHECK-NEXT:  Loop %inner: backedge-taken count is ({0,+,1}<nuw><nsw><%outer.header> + ({0,+,-1}<nsw><%outer.header> smax %n))
+; CHECK-NEXT:  Loop %inner: backedge-taken count is ({0,+,1}<nuw><nsw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n))
 ; CHECK-NEXT:  Loop %inner: constant max backedge-taken count is i32 2147483647
-; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is ({0,+,1}<nuw><nsw><%outer.header> + ({0,+,-1}<nsw><%outer.header> smax %n))
+; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is ({0,+,1}<nuw><nsw><%outer.header> + ({0,+,-1}<nuw><nsw><%outer.header> smax %n))
 ; CHECK-NEXT:  Loop %inner: Trip multiple is 1
 ; CHECK-NEXT:  Loop %outer.header: backedge-taken count is i64 0
 ; CHECK-NEXT:  Loop %outer.header: constant max backedge-taken count is i64 0
@@ -638,9 +638,9 @@ exit:
 define void @step_is_neg_addrec_unknown_start(i32 %n) {
 ; CHECK-LABEL: 'step_is_neg_addrec_unknown_start'
 ; CHECK-NEXT:  Determining loop execution counts for: @step_is_neg_addrec_unknown_start
-; CHECK-NEXT:  Loop %inner: backedge-taken count is ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nsw><%outer.header>))
+; CHECK-NEXT:  Loop %inner: backedge-taken count is ({(-1 * %n),+,1}<nuw><nsw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>))
 ; CHECK-NEXT:  Loop %inner: constant max backedge-taken count is i32 -2147483640
-; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is ({(-1 * %n),+,1}<nw><%outer.header> + (8 smax {%n,+,-1}<nsw><%outer.header>))
+; CHECK-NEXT:  Loop %inner: symbolic max backedge-taken count is ({(-1 * %n),+,1}<nuw><nsw><%outer.header> + (8 smax {%n,+,-1}<nuw><nsw><%outer.header>))
 ; CHECK-NEXT:  Loop %inner: Trip multiple is 1
 ; CHECK-NEXT:  Loop %outer.header: backedge-taken count is i64 0
 ; CHECK-NEXT:  Loop %outer.header: constant max backedge-taken count is i64 0
diff --git a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll
index fbdbefb875fba..b97afae894621 100644
--- a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll
@@ -77,9 +77,9 @@ define void @uge_sext_x_zext_x(i32 %len) {
 ; CHECK-NEXT:    %len.sext = sext i32 %len to i64
 ; CHECK-NEXT:    --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648)
 ; CHECK-NEXT:    %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i64 %iv, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64))<nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64))<nsw> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %sel = select i1 %cmp1, i64 %len.zext, i64 %len.sext
 ; CHECK-NEXT:    --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:  Determining loop execution counts for: @uge_sext_x_zext_x
@@ -145,9 +145,9 @@ define void @ugt_sext_x_zext_x(i32 %len) {
 ; CHECK-NEXT:    %len.sext = sext i32 %len to i64
 ; CHECK-NEXT:    --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648)
 ; CHECK-NEXT:    %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i64 %iv, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64))<nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64))<nsw> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %umax = select i1 %cmp1, i64 %len.zext, i64 %len.sext
 ; CHECK-NEXT:    --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:  Determining loop execution counts for: @ugt_sext_x_zext_x
@@ -247,9 +247,9 @@ define void @slt_sext_x_zext_x(i32 %len) {
 ; CHECK-NEXT:    %len.sext = sext i32 %len to i64
 ; CHECK-NEXT:    --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648)
 ; CHECK-NEXT:    %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i64 %iv, 1
-; CHECK-NEXT:    --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64))<nsw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {1,+,1}<nw><%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64))<nsw> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %umin = select i1 %cmp1, i64 %len.zext, i64 %len.sext
 ; CHECK-NEXT:    --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant }
 ; CHECK-NEXT:  Determining loop execution counts for: @slt_sext_x_zext_x
diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 935a8c6fb6c39..7aa7dde4296a7 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -641,46 +641,44 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
 ; CHECK-NEXT:    cmpwi r4, 1
 ; CHECK-NEXT:    bltlr cr0
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    clrldi r4, r4, 32
-; CHECK-NEXT:    li r6, 0
-; CHECK-NEXT:    mtctr r4
-; CHECK-NEXT:    li r4, 0
+; CHECK-NEXT:    clrldi r6, r4, 32
+; CHECK-NEXT:    addi r4, r5, 48
+; CHECK-NEXT:    li r5, 0
+; CHECK-NEXT:    mtctr r6
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB9_2: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    rldic r7, r6, 4, 28
+; CHECK-NEXT:    lxv vs0, -48(r4)
+; CHECK-NEXT:    lxv vs1, -32(r4)
 ; CHECK-NEXT:    xxsetaccz acc2
 ; CHECK-NEXT:    xxsetaccz acc1
-; CHECK-NEXT:    addi r6, r6, 6
-; CHECK-NEXT:    add r8, r5, r7
-; CHECK-NEXT:    lxvx vs0, r5, r7
-; CHECK-NEXT:    rldic r7, r4, 6, 26
-; CHECK-NEXT:    addi r4, r4, 3
-; CHECK-NEXT:    lxv vs1, 16(r8)
+; CHECK-NEXT:    rldic r6, r5, 6, 26
+; CHECK-NEXT:    addi r5, r5, 3
+; CHECK-NEXT:    add r7, r3, r6
 ; CHECK-NEXT:    xvf32gerpp acc2, vs0, vs1
-; CHECK-NEXT:    lxv vs0, 32(r8)
-; CHECK-NEXT:    lxv vs1, 48(r8)
+; CHECK-NEXT:    lxv vs0, -16(r4)
+; CHECK-NEXT:    lxv vs1, 0(r4)
 ; CHECK-NEXT:    xvf32gerpn acc1, vs0, vs1
-; CHECK-NEXT:    lxv vs12, 64(r8)
-; CHECK-NEXT:    lxv vs13, 80(r8)
+; CHECK-NEXT:    lxv vs12, 16(r4)
+; CHECK-NEXT:    lxv vs13, 32(r4)
 ; CHECK-NEXT:    xxsetaccz acc0
-; CHECK-NEXT:    add r8, r3, r7
-; CHECK-NEXT:    xxmfacc acc2
+; CHECK-NEXT:    addi r4, r4, 96
 ; CHECK-NEXT:    xvf32gernp acc0, vs12, vs13
-; CHECK-NEXT:    stxvx vs11, r3, r7
-; CHECK-NEXT:    stxv vs8, 48(r8)
+; CHECK-NEXT:    xxmfacc acc2
 ; CHECK-NEXT:    xxmfacc acc1
-; CHECK-NEXT:    stxv vs9, 32(r8)
-; CHECK-NEXT:    stxv vs10, 16(r8)
-; CHECK-NEXT:    stxv vs4, 112(r8)
-; CHECK-NEXT:    stxv vs5, 96(r8)
+; CHECK-NEXT:    stxvx vs11, r3, r6
+; CHECK-NEXT:    stxv vs8, 48(r7)
+; CHECK-NEXT:    stxv vs9, 32(r7)
+; CHECK-NEXT:    stxv vs10, 16(r7)
 ; CHECK-NEXT:    xxmfacc acc0
-; CHECK-NEXT:    stxv vs6, 80(r8)
-; CHECK-NEXT:    stxv vs7, 64(r8)
-; CHECK-NEXT:    stxv vs0, 176(r8)
-; CHECK-NEXT:    stxv vs1, 160(r8)
-; CHECK-NEXT:    stxv vs2, 144(r8)
-; CHECK-NEXT:    stxv vs3, 128(r8)
+; CHECK-NEXT:    stxv vs4, 112(r7)
+; CHECK-NEXT:    stxv vs5, 96(r7)
+; CHECK-NEXT:    stxv vs6, 80(r7)
+; CHECK-NEXT:    stxv vs7, 64(r7)
+; CHECK-NEXT:    stxv vs0, 176(r7)
+; CHECK-NEXT:    stxv vs1, 160(r7)
+; CHECK-NEXT:    stxv vs2, 144(r7)
+; CHECK-NEXT:    stxv vs3, 128(r7)
 ; CHECK-NEXT:    bdnz .LBB9_2
 ; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-NEXT:    blr
@@ -690,46 +688,44 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
 ; CHECK-BE-NEXT:    cmpwi r4, 1
 ; CHECK-BE-NEXT:    bltlr cr0
 ; CHECK-BE-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-BE-NEXT:    clrldi r4, r4, 32
-; CHECK-BE-NEXT:    li r6, 0
-; CHECK-BE-NEXT:    mtctr r4
-; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    clrldi r6, r4, 32
+; CHECK-BE-NEXT:    addi r4, r5, 48
+; CHECK-BE-NEXT:    li r5, 0
+; CHECK-BE-NEXT:    mtctr r6
 ; CHECK-BE-NEXT:    .p2align 4
 ; CHECK-BE-NEXT:  .LBB9_2: # %for.body
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    rldic r7, r6, 4, 28
+; CHECK-BE-NEXT:    lxv vs0, -48(r4)
+; CHECK-BE-NEXT:    lxv vs1, -32(r4)
 ; CHECK-BE-NEXT:    xxsetaccz acc2
 ; CHECK-BE-NEXT:    xxsetaccz acc1
-; CHECK-BE-NEXT:    addi r6, r6, 6
-; CHECK-BE-NEXT:    add r8, r5, r7
-; CHECK-BE-NEXT:    lxvx vs0, r5, r7
-; CHECK-BE-NEXT:    rldic r7, r4, 6, 26
-; CHECK-BE-NEXT:    addi r4, r4, 3
-; CHECK-BE-NEXT:    lxv vs1, 16(r8)
+; CHECK-BE-NEXT:    rldic r6, r5, 6, 26
+; CHECK-BE-NEXT:    addi r5, r5, 3
+; CHECK-BE-NEXT:    add r7, r3, r6
 ; CHECK-BE-NEXT:    xvf32gerpp acc2, vs0, vs1
-; CHECK-BE-NEXT:    lxv vs0, 32(r8)
-; CHECK-BE-NEXT:    lxv vs1, 48(r8)
+; CHECK-BE-NEXT:    lxv vs0, -16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xvf32gerpn acc1, vs0, vs1
-; CHECK-BE-NEXT:    lxv vs12, 64(r8)
-; CHECK-BE-NEXT:    lxv vs13, 80(r8)
+; CHECK-BE-NEXT:    lxv vs12, 16(r4)
+; CHECK-BE-NEXT:    lxv vs13, 32(r4)
 ; CHECK-BE-NEXT:    xxsetaccz acc0
-; CHECK-BE-NEXT:    add r8, r3, r7
-; CHECK-BE-NEXT:    xxmfacc acc2
+; CHECK-BE-NEXT:    addi r4, r4, 96
 ; CHECK-BE-NEXT:    xvf32gernp acc0, vs12, vs13
-; CHECK-BE-NEXT:    stxvx vs8, r3, r7
-; CHECK-BE-NEXT:    stxv vs9, 16(r8)
+; CHECK-BE-NEXT:    xxmfacc acc2
 ; CHECK-BE-NEXT:    xxmfacc acc1
-; CHECK-BE-NEXT:    stxv vs11, 48(r8)
-; CHECK-BE-NEXT:    stxv vs10, 32(r8)
-; CHECK-BE-NEXT:    stxv vs5, 80(r8)
-; CHECK-BE-NEXT:    stxv vs4, 64(r8)
+; CHECK-BE-NEXT:    stxvx vs8, r3, r6
+; CHECK-BE-NEXT:    stxv vs9, 16(r7)
+; CHECK-BE-NEXT:    stxv vs11, 48(r7)
+; CHECK-BE-NEXT:    stxv vs10, 32(r7)
 ; CHECK-BE-NEXT:    xxmfacc acc0
-; CHECK-BE-NEXT:    stxv vs7, 112(r8)
-; CHECK-BE-NEXT:    stxv vs6, 96(r8)
-; CHECK-BE-NEXT:    stxv vs1, 144(r8)
-; CHECK-BE-NEXT:    stxv vs0, 128(r8)
-; CHECK-BE-NEXT:    stxv vs3, 176(r8)
-; CHECK-BE-NEXT:    stxv vs2, 160(r8)
+; CHECK-BE-NEXT:    stxv vs5, 80(r7)
+; CHECK-BE-NEXT:    stxv vs4, 64(r7)
+; CHECK-BE-NEXT:    stxv vs7, 112(r7)
+; CHECK-BE-NEXT:    stxv vs6, 96(r7)
+; CHECK-BE-NEXT:    stxv vs1, 144(r7)
+; CHECK-BE-NEXT:    stxv vs0, 128(r7)
+; CHECK-BE-NEXT:    stxv vs3, 176(r7)
+; CHECK-BE-NEXT:    stxv vs2, 160(r7)
 ; CHECK-BE-NEXT:    bdnz .LBB9_2
 ; CHECK-BE-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-BE-NEXT:    blr
@@ -741,44 +737,42 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
 ; CHECK-WACC-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-WACC-NEXT:    clrldi r4, r4, 32
 ; CHECK-WACC-NEXT:    mtctr r4
-; CHECK-WACC-NEXT:    li r4, 0
-; CHECK-WACC-NEXT:    li r6, 0
+; CHECK-WACC-NEXT:    addi r4, r5, 48
+; CHECK-WACC-NEXT:    li r5, 0
 ; CHECK-WACC-NEXT:    .p2align 4
 ; CHECK-WACC-NEXT:  .LBB9_2: # %for.body
 ; CHECK-WACC-NEXT:    #
-; CHECK-WACC-NEXT:    rldic r7, r6, 4, 28
-; CHECK-WACC-NEXT:    add r8, r5, r7
-; CHECK-WACC-NEXT:    lxvx vs0, r5, r7
-; CHECK-WACC-NEXT:    lxv vs1, 16(r8)
+; CHECK-WACC-NEXT:    lxv vs0, -48(r4)
+; CHECK-WACC-NEXT:    lxv vs1, -32(r4)
 ; CHECK-WACC-NEXT:    dmxxsetaccz wacc2
 ; CHECK-WACC-NEXT:    dmxxsetaccz wacc1
 ; CHECK-WACC-NEXT:    dmxxsetaccz wacc0
 ; CHECK-WACC-NEXT:    xvf32gerpp wacc2, vs0, vs1
-; CHECK-WACC-NEXT:    lxv vs0, 32(r8)
-; CHECK-WACC-NEXT:    lxv vs1, 48(r8)
-; CHECK-WACC-NEXT:    rldic r7, r4, 6, 26
-; CHECK-WACC-NEXT:    addi r4, r4, 3
-; CHECK-WACC-NEXT:    addi r6, r6, 6
+; CHECK-WACC-NEXT:    lxv vs0, -16(r4)
+; CHECK-WACC-NEXT:    lxv vs1, 0(r4)
+; CHECK-WACC-NEXT:    rldic r6, r5, 6, 26
+; CHECK-WACC-NEXT:    add r7, r3, r6
+; CHECK-WACC-NEXT:    addi r5, r5, 3
 ; CHECK-WACC-NEXT:    xvf32gerpn wacc1, vs0, vs1
-; CHECK-WACC-NEXT:    lxv vs0, 64(r8)
-; CHECK-WACC-NEXT:    lxv vs1, 80(r8)
-; CHECK-WACC-NEXT:    add r8, r3, r7
+; CHECK-WACC-NEXT:    lxv vs0, 16(r4)
+; CHECK-WACC-NEXT:    lxv vs1, 32(r4)
+; CHECK-WACC-NEXT:    addi r4, r4, 96
 ; CHECK-WACC-NEXT:    xvf32gernp wacc0, vs0, vs1
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc2, 0
-; CHECK-WACC-NEXT:    stxvx v3, r3, r7
-; CHECK-WACC-NEXT:    stxv v4, 48(r8)
-; CHECK-WACC-NEXT:    stxv v5, 32(r8)
-; CHECK-WACC-NEXT:    stxv v2, 16(r8)
+; CHECK-WACC-NEXT:    stxvx v3, r3, r6
+; CHECK-WACC-NEXT:    stxv v4, 48(r7)
+; CHECK-WACC-NEXT:    stxv v5, 32(r7)
+; CHECK-WACC-NEXT:    stxv v2, 16(r7)
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
-; CHECK-WACC-NEXT:    stxv v4, 112(r8)
-; CHECK-WACC-NEXT:    stxv v5, 96(r8)
-; CHECK-WACC-NEXT:    stxv v2, 80(r8)
-; CHECK-WACC-NEXT:    stxv v3, 64(r8)
+; CHECK-WACC-NEXT:    stxv v4, 112(r7)
+; CHECK-WACC-NEXT:    stxv v5, 96(r7)
+; CHECK-WACC-NEXT:    stxv v2, 80(r7)
+; CHECK-WACC-NEXT:    stxv v3, 64(r7)
 ; CHECK-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
-; CHECK-WACC-NEXT:    stxv v4, 176(r8)
-; CHECK-WACC-NEXT:    stxv v5, 160(r8)
-; CHECK-WACC-NEXT:    stxv v2, 144(r8)
-; CHECK-WACC-NEXT:    stxv v3, 128(r8)
+; CHECK-WACC-NEXT:    stxv v4, 176(r7)
+; CHECK-WACC-NEXT:    stxv v5, 160(r7)
+; CHECK-WACC-NEXT:    stxv v2, 144(r7)
+; CHECK-WACC-NEXT:    stxv v3, 128(r7)
 ; CHECK-WACC-NEXT:    bdnz .LBB9_2
 ; CHECK-WACC-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-WACC-NEXT:    blr
@@ -790,44 +784,42 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) {
 ; CHECK-BE-WACC-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-BE-WACC-NEXT:    clrldi r4, r4, 32
 ; CHECK-BE-WACC-NEXT:    mtctr r4
-; CHECK-BE-WACC-NEXT:    li r4, 0
-; CHECK-BE-WACC-NEXT:    li r6, 0
+; CHECK-BE-WACC-NEXT:    addi r4, r5, 48
+; CHECK-BE-WACC-NEXT:    li r5, 0
 ; CHECK-BE-WACC-NEXT:    .p2align 4
 ; CHECK-BE-WACC-NEXT:  .LBB9_2: # %for.body
 ; CHECK-BE-WACC-NEXT:    #
-; CHECK-BE-WACC-NEXT:    rldic r7, r6, 4, 28
-; CHECK-BE-WACC-NEXT:    add r8, r5, r7
-; CHECK-BE-WACC-NEXT:    lxvx vs0, r5, r7
-; CHECK-BE-WACC-NEXT:    lxv vs1, 16(r8)
+; CHECK-BE-WACC-NEXT:    lxv vs0, -48(r4)
+; CHECK-BE-WACC-NEXT:    lxv vs1, -32(r4)
 ; CHECK-BE-WACC-NEXT:    dmxxsetaccz wacc2
 ; CHECK-BE-WACC-NEXT:    dmxxsetaccz wacc1
 ; CHECK-BE-WACC-NEXT:    dmxxsetaccz wacc0
 ; CHECK-BE-WACC-NEXT:    xvf32gerpp wacc2, vs0, vs1
-; CHECK-BE-WACC-NEXT:    lxv vs0, 32(r8)
-; CHECK-BE-WACC-NEXT:    lxv vs1, 48(r8)
-; CHECK-BE-WACC-NEXT:    rldic r7, r4, 6, 26
-; CHECK-BE-WACC-NEXT:    addi r4, r4, 3
-; CHECK-BE-WACC-NEXT:    addi r6, r6, 6
+; CHECK-BE-WACC-NEXT:    lxv vs0, -16(r4)
+; CHECK-BE-WACC-NEXT:    lxv vs1, 0(r4)
+; CHECK-BE-WACC-NEXT:    rldic r6, r5, 6, 26
+; CHECK-BE-WACC-NEXT:    add r7, r3, r6
+; CHECK-BE-WACC-NEXT:    addi r5, r5, 3
 ; CHECK-BE-WACC-NEXT:    xvf32gerpn wacc1, vs0, vs1
-; CHECK-BE-WACC-NEXT:    lxv vs0, 64(r8)
-; CHECK-BE-WACC-NEXT:    lxv vs1, 80(r8)
-; CHECK-BE-WACC-NEXT:    add r8, r3, r7
+; CHECK-BE-WACC-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-WACC-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-WACC-NEXT:    addi r4, r4, 96
 ; CHECK-BE-WACC-NEXT:    xvf32gernp wacc0, vs0, vs1
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc2, 0
-; CHECK-BE-WACC-NEXT:    stxvx v2, r3, r7
-; CHECK-BE-WACC-NEXT:    stxv v5, 48(r8)
-; CHECK-BE-WACC-NEXT:    stxv v4, 32(r8)
-; CHECK-BE-WACC-NEXT:    stxv v3, 16(r8)
+; CHECK-BE-WACC-NEXT:    stxvx v2, r3, r6
+; CHECK-BE-WACC-NEXT:    stxv v5, 48(r7)
+; CHECK-BE-WACC-NEXT:    stxv v4, 32(r7)
+; CHECK-BE-WACC-NEXT:    stxv v3, 16(r7)
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc1, 0
-; CHECK-BE-WACC-NEXT:    stxv v5, 112(r8)
-; CHECK-BE-WACC-NEXT:    stxv v4, 96(r8)
-; CHECK-BE-WACC-NEXT:    stxv v3, 80(r8)
-; CHECK-BE-WACC-NEXT:    stxv v2, 64(r8)
+; CHECK-BE-WACC-NEXT:    stxv v5, 112(r7)
+; CHECK-BE-WACC-NEXT:    stxv v4, 96(r7)
+; CHECK-BE-WACC-NEXT:    stxv v3, 80(r7)
+; CHECK-BE-WACC-NEXT:    stxv v2, 64(r7)
 ; CHECK-BE-WACC-NEXT:    dmxxextfdmr512 vsp34, vsp36, wacc0, 0
-; CHECK-BE-WACC-NEXT:    stxv v5, 176(r8)
-; CHECK-BE-WACC-NEXT:    stxv v4, 160(r8)
-; CHECK-BE-WACC-NEXT:    stxv v3, 144(r8)
-; CHECK-BE-WACC-NEXT:    stxv v2, 128(r8)
+; CHECK-BE-WACC-NEXT:    stxv v5, 176(r7)
+; CHECK-BE-WACC-NEXT:    stxv v4, 160(r7)
+; CHECK-BE-WACC-NEXT:    stxv v3, 144(r7)
+; CHECK-BE-WACC-NEXT:    stxv v2, 128(r7)
 ; CHECK-BE-WACC-NEXT:    bdnz .LBB9_2
 ; CHECK-BE-WACC-NEXT:  # %bb.3: # %for.cond.cleanup
 ; CHECK-BE-WACC-NEXT:    blr
diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll
index 3f75d0c9880f8..5eebd75411056 100644
--- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll
+++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll
@@ -15,7 +15,8 @@ define void @foo() {
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @global, i64 1), align 1
-; CHECK-NEXT:    br i1 false, label [[BB7:%.*]], label [[BB11:%.*]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt ptr getelementptr (i8, ptr @global, i64 1), getelementptr (i8, ptr @global, i64 500)
+; CHECK-NEXT:    br i1 [[TMP5]], label [[BB7:%.*]], label [[BB11:%.*]]
 ; CHECK:       bb7:
 ; CHECK-NEXT:    [[TMP8:%.*]] = zext i8 [[TMP6]] to i64
 ; CHECK-NEXT:    br i1 true, label [[BB11]], label [[BB3]]
diff --git a/llvm/test/Transforms/LoopInterchange/fixed-size-no-signed-wrap.ll b/llvm/test/Transforms/LoopInterchange/fixed-size-no-signed-wrap.ll
index 91f3b850e0a20..c5c492ae7956a 100644
--- a/llvm/test/Transforms/LoopInterchange/fixed-size-no-signed-wrap.ll
+++ b/llvm/test/Transforms/LoopInterchange/fixed-size-no-signed-wrap.ll
@@ -12,36 +12,46 @@
 ;     end do
 ;   end do
 
-; FIXME: We currently fail to interchange this.
 define void @fixed_size_5x5(ptr noalias %A) {
 ; CHECK-LABEL: define void @fixed_size_5x5(
 ; CHECK-SAME: ptr noalias [[A:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[OUTER_HEADER:.*]]
-; CHECK:       [[OUTER_HEADER]]:
-; CHECK-NEXT:    [[I_COUNT:%.*]] = phi i64 [ 5, %[[ENTRY]] ], [ [[I_COUNT_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[OUTER_LATCH]] ]
+; CHECK:       [[OUTER_HEADER_PREHEADER:.*]]:
+; CHECK-NEXT:    br label %[[OUTER_HEADER1:.*]]
+; CHECK:       [[OUTER_HEADER1]]:
+; CHECK-NEXT:    [[I_COUNT:%.*]] = phi i64 [ [[I_COUNT_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ 5, %[[OUTER_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[OUTER_LATCH]] ], [ 1, %[[OUTER_HEADER_PREHEADER]] ]
 ; CHECK-NEXT:    [[I_EXT:%.*]] = zext nneg i32 [[I]] to i64
 ; CHECK-NEXT:    [[ROW_GEP:%.*]] = getelementptr [4 x i8], ptr [[A]], i64 [[I_EXT]]
 ; CHECK-NEXT:    br label %[[INNER:.*]]
+; CHECK:       [[OUTER_HEADER]]:
+; CHECK-NEXT:    br label %[[INNER1:.*]]
+; CHECK:       [[INNER1]]:
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[INNER_SPLIT:.*]] ], [ 1, %[[OUTER_HEADER]] ]
+; CHECK-NEXT:    [[J_COUNT:%.*]] = phi i64 [ [[TMP1:%.*]], %[[INNER_SPLIT]] ], [ 5, %[[OUTER_HEADER]] ]
+; CHECK-NEXT:    br label %[[OUTER_HEADER_PREHEADER]]
 ; CHECK:       [[INNER]]:
-; CHECK-NEXT:    [[J:%.*]] = phi i64 [ 1, %[[OUTER_HEADER]] ], [ [[J_NEXT:%.*]], %[[INNER]] ]
-; CHECK-NEXT:    [[J_COUNT:%.*]] = phi i64 [ 5, %[[OUTER_HEADER]] ], [ [[J_COUNT_NEXT:%.*]], %[[INNER]] ]
 ; CHECK-NEXT:    [[COL_OFF:%.*]] = mul nuw nsw i64 [[J]], 20
 ; CHECK-NEXT:    [[ELT_GEP:%.*]] = getelementptr i8, ptr [[ROW_GEP]], i64 [[COL_OFF]]
 ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i8, ptr [[ELT_GEP]], i64 -24
 ; CHECK-NEXT:    [[V:%.*]] = load float, ptr [[ADDR]], align 4
 ; CHECK-NEXT:    [[INC:%.*]] = fadd contract float [[V]], 1.000000e+00
 ; CHECK-NEXT:    store float [[INC]], ptr [[ADDR]], align 4
-; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 1
-; CHECK-NEXT:    [[J_COUNT_NEXT]] = add nsw i64 [[J_COUNT]], -1
+; CHECK-NEXT:    [[J_NEXT:%.*]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT:    [[J_COUNT_NEXT:%.*]] = add nsw i64 [[J_COUNT]], -1
 ; CHECK-NEXT:    [[J_DONE:%.*]] = icmp eq i64 [[J_COUNT_NEXT]], 0
-; CHECK-NEXT:    br i1 [[J_DONE]], label %[[OUTER_LATCH]], label %[[INNER]]
+; CHECK-NEXT:    br label %[[OUTER_LATCH]]
+; CHECK:       [[INNER_SPLIT]]:
+; CHECK-NEXT:    [[TMP0]] = add nuw nsw i64 [[J]], 1
+; CHECK-NEXT:    [[TMP1]] = add nsw i64 [[J_COUNT]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[EXIT:.*]], label %[[INNER1]]
 ; CHECK:       [[OUTER_LATCH]]:
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[I_COUNT_NEXT]] = add nsw i64 [[I_COUNT]], -1
 ; CHECK-NEXT:    [[I_CMP:%.*]] = icmp sgt i64 [[I_COUNT]], 1
-; CHECK-NEXT:    br i1 [[I_CMP]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
+; CHECK-NEXT:    br i1 [[I_CMP]], label %[[OUTER_HEADER1]], label %[[INNER_SPLIT]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
index 1c67b243429b5..4dcf948d4a23e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
@@ -17,33 +17,18 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; RV32-LABEL: @foo4(
 ; RV32-NEXT:  entry:
 ; RV32-NEXT:    br label [[VECTOR_MEMCHECK:%.*]]
-; RV32:       vector.scevcheck:
-; RV32-NEXT:    [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 128, i32 624)
-; RV32-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
-; RV32-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
-; RV32-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[MUL_RESULT]]
-; RV32-NEXT:    [[TMP1:%.*]] = icmp ult ptr [[TMP0]], [[A]]
-; RV32-NEXT:    [[TMP2:%.*]] = or i1 [[TMP1]], [[MUL_OVERFLOW]]
-; RV32-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 256, i32 624)
-; RV32-NEXT:    [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
-; RV32-NEXT:    [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
-; RV32-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[MUL_RESULT2]]
-; RV32-NEXT:    [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[B]]
-; RV32-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW3]]
-; RV32-NEXT:    [[TMP6:%.*]] = or i1 [[TMP2]], [[TMP5]]
-; RV32-NEXT:    br i1 [[TMP6]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK1:%.*]]
 ; RV32:       vector.memcheck:
+; RV32-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 79880
 ; RV32-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i32 39940
-; RV32-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i32 79880
-; RV32-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B]], i32 159752
-; RV32-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]]
-; RV32-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
+; RV32-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 159752
+; RV32-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]]
+; RV32-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]]
 ; RV32-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; RV32-NEXT:    [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]]
 ; RV32-NEXT:    [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]]
 ; RV32-NEXT:    [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
 ; RV32-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
-; RV32-NEXT:    br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; RV32-NEXT:    br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; RV32:       vector.ph:
 ; RV32-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
 ; RV32-NEXT:    [[TMP9:%.*]] = mul nuw nsw <vscale x 2 x i64> [[TMP7]], splat (i64 16)
@@ -58,22 +43,22 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; RV32-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP11]], i64 0
 ; RV32-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 ; RV32-NEXT:    [[TMP12:%.*]] = trunc i64 [[INDEX]] to i32
-; RV32-NEXT:    [[TMP13:%.*]] = shl i32 [[TMP12]], 6
-; RV32-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[TRIGGER]], i32 [[TMP13]]
-; RV32-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr align 4 [[TMP15]], i32 64, <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; RV32-NEXT:    [[TMP6:%.*]] = shl nuw i32 [[TMP12]], 6
+; RV32-NEXT:    [[TMP13:%.*]] = getelementptr nuw i8, ptr [[TRIGGER]], i32 [[TMP6]]
+; RV32-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i32> @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr align 4 [[TMP13]], i32 64, <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]]
 ; RV32-NEXT:    [[TMP14:%.*]] = icmp slt <vscale x 2 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100)
 ; RV32-NEXT:    [[TMP20:%.*]] = shl i32 [[TMP12]], 8
 ; RV32-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP20]]
-; RV32-NEXT:    [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i32(ptr align 8 [[TMP25]], i32 256, <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]]
+; RV32-NEXT:    [[WIDE_MASKED_GATHER6:%.*]] = call <vscale x 2 x double> @llvm.experimental.vp.strided.load.nxv2f64.p0.i32(ptr align 8 [[TMP25]], i32 256, <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]]
 ; RV32-NEXT:    [[TMP17:%.*]] = sitofp <vscale x 2 x i32> [[WIDE_MASKED_GATHER]] to <vscale x 2 x double>
 ; RV32-NEXT:    [[TMP18:%.*]] = fadd <vscale x 2 x double> [[WIDE_MASKED_GATHER6]], [[TMP17]]
 ; RV32-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], <vscale x 2 x i64> [[VEC_IND]]
-; RV32-NEXT:    call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP18]], <vscale x 2 x ptr> align 8 [[TMP19]], <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3]], !noalias [[META5]]
+; RV32-NEXT:    call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[TMP18]], <vscale x 2 x ptr> align 8 [[TMP19]], <vscale x 2 x i1> [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
 ; RV32-NEXT:    [[CURRENT_ITERATION_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]]
 ; RV32-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]]
 ; RV32-NEXT:    [[VEC_IND_NEXT]] = add nuw nsw <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; RV32-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
-; RV32-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; RV32-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; RV32:       middle.block:
 ; RV32-NEXT:    br label [[FOR_END:%.*]]
 ; RV32:       scalar.ph:
@@ -96,7 +81,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; RV32:       for.inc:
 ; RV32-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16
 ; RV32-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000
-; RV32-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]]
+; RV32-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP11:![0-9]+]]
 ; RV32:       for.end:
 ; RV32-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll
index fb40384873a19..a63cced82cde2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll
@@ -101,31 +101,14 @@ define void @replicate_sext(i32 %N, ptr %dst, ptr %src) #0 {
 ; CHECK-SAME: i32 [[N:%.*]], ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 40
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 16
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
 ; CHECK:       [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 12, i64 [[TMP1]])
-; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]]
-; CHECK-NEXT:    [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 4, i32 [[N]])
 ; CHECK-NEXT:    [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[MUL_RESULT2]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]]
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 4
-; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    [[MUL4:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP7]])
-; CHECK-NEXT:    [[MUL_RESULT5:%.*]] = extractvalue { i64, i1 } [[MUL4]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW6:%.*]] = extractvalue { i64, i1 } [[MUL4]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP]]
-; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW6]]
-; CHECK-NEXT:    [[TMP11:%.*]] = or i1 [[TMP4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP12:%.*]] = or i1 [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]]
 ; CHECK-NEXT:    br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
 ; CHECK:       [[VECTOR_MEMCHECK]]:
 ; CHECK-NEXT:    [[TMP13:%.*]] = zext i32 [[N]] to i64
@@ -168,13 +151,13 @@ define void @replicate_sext(i32 %N, ptr %dst, ptr %src) #0 {
 ; CHECK-NEXT:    br label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL10:%.*]] = phi i32 [ [[TMP20]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL11:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP20]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV_1:%.*]] = phi i32 [ [[IV_1_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[SCALAR_PH]] ]
-; CHECK-NEXT:    [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[IV_2_EXT:%.*]] = sext i32 [[IV_2]] to i64
 ; CHECK-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr nusw i32, ptr [[SRC]], i64 [[IV_2_EXT]]
 ; CHECK-NEXT:    [[L_0:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
index 9253d72a10a45..319dc7e595923 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll
@@ -223,35 +223,33 @@ define i64 @select_icmp_noflag(ptr %a, ptr %b, i64 %ii, i64 %n) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[II]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
-; CHECK-NEXT:    [[TMP5]] = select i1 [[TMP4]], <4 x i1> [[TMP2]], <4 x i1> [[LAST_ACTIVE_MASK]]
-; CHECK-NEXT:    [[TMP6]] = select i1 [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP3]] = or <4 x i1> [[VEC_PHI1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[BROADCAST_SPLAT]], i64 0
-; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> [[TMP6]], <4 x i1> [[TMP5]], i64 [[TMP8]])
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP4]])
+; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
+; CHECK-NEXT:    [[TMP8:%.*]] = freeze i1 [[TMP9]]
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[TMP6]], i64 [[II]]
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
@@ -266,7 +264,7 @@ define i64 @select_icmp_noflag(ptr %a, ptr %b, i64 %ii, i64 %n) {
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       [[EXIT]]:
-; CHECK-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i64 [[COND_LCSSA]]
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
index 9840d60d46200..7a2f26c43c333 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll
@@ -959,27 +959,28 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
 ; CHECK-VF4IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK-VF4IC1:       [[VECTOR_BODY]]:
 ; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
 ; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3)
-; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP8]]
-; CHECK-VF4IC1-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
-; CHECK-VF4IC1-NEXT:    [[TMP4]] = select i1 [[TMP3]], <4 x i1> [[TMP8]], <4 x i1> [[LAST_ACTIVE_MASK]]
-; CHECK-VF4IC1-NEXT:    [[TMP5]] = select i1 [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[TMP2]] = or <4 x i1> [[VEC_PHI1]], [[TMP8]]
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
 ; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
 ; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4IC1-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK-VF4IC1:       [[MIDDLE_BLOCK]]:
-; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP5]], <4 x i1> [[TMP4]], i32 331)
+; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]])
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = freeze i1 [[TMP9]]
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 331
 ; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK-VF4IC1:       [[SCALAR_PH]]:
 ; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
-; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
 ; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK-VF4IC1:       [[FOR_BODY]]:
 ; CHECK-VF4IC1-NEXT:    [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
@@ -993,7 +994,7 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
 ; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
 ; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK-VF4IC1:       [[EXIT_LOOPEXIT]]:
-; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-VF4IC1-NEXT:    br label %[[EXIT]]
 ; CHECK-VF4IC1:       [[EXIT]]:
 ; CHECK-VF4IC1-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
@@ -1014,14 +1015,14 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
 ; CHECK-VF4IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK-VF4IC4:       [[VECTOR_BODY]]:
 ; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[TMP12:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[TMP19:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[TMP20:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-VF4IC4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
@@ -1038,36 +1039,34 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
 ; CHECK-VF4IC4-NEXT:    [[TMP33:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD4]], splat (i32 3)
 ; CHECK-VF4IC4-NEXT:    [[TMP10:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD5]], splat (i32 3)
 ; CHECK-VF4IC4-NEXT:    [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD6]], splat (i32 3)
-; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
-; CHECK-VF4IC4-NEXT:    [[TMP13:%.*]] = freeze <4 x i1> [[TMP33]]
-; CHECK-VF4IC4-NEXT:    [[TMP14:%.*]] = or <4 x i1> [[TMP2]], [[TMP13]]
-; CHECK-VF4IC4-NEXT:    [[TMP15:%.*]] = freeze <4 x i1> [[TMP10]]
-; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = or <4 x i1> [[TMP14]], [[TMP15]]
-; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = freeze <4 x i1> [[TMP11]]
-; CHECK-VF4IC4-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]]
-; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
-; CHECK-VF4IC4-NEXT:    [[TMP4]] = select i1 [[TMP3]], <4 x i1> [[TMP1]], <4 x i1> [[LAST_ACTIVE_MASK]]
-; CHECK-VF4IC4-NEXT:    [[TMP21]] = select i1 [[TMP3]], <4 x i1> [[TMP33]], <4 x i1> [[TMP12]]
-; CHECK-VF4IC4-NEXT:    [[TMP22]] = select i1 [[TMP3]], <4 x i1> [[TMP10]], <4 x i1> [[TMP19]]
-; CHECK-VF4IC4-NEXT:    [[TMP23]] = select i1 [[TMP3]], <4 x i1> [[TMP11]], <4 x i1> [[TMP20]]
-; CHECK-VF4IC4-NEXT:    [[TMP5]] = select i1 [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
-; CHECK-VF4IC4-NEXT:    [[TMP25]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
-; CHECK-VF4IC4-NEXT:    [[TMP26]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
-; CHECK-VF4IC4-NEXT:    [[TMP27]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
+; CHECK-VF4IC4-NEXT:    [[TMP16]] = or <4 x i1> [[VEC_PHI4]], [[TMP1]]
+; CHECK-VF4IC4-NEXT:    [[TMP20]] = or <4 x i1> [[VEC_PHI5]], [[TMP33]]
+; CHECK-VF4IC4-NEXT:    [[TMP21]] = or <4 x i1> [[VEC_PHI6]], [[TMP10]]
+; CHECK-VF4IC4-NEXT:    [[TMP22]] = or <4 x i1> [[VEC_PHI7]], [[TMP11]]
+; CHECK-VF4IC4-NEXT:    [[TMP12]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC4-NEXT:    [[TMP13]] = select <4 x i1> [[TMP33]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
+; CHECK-VF4IC4-NEXT:    [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
+; CHECK-VF4IC4-NEXT:    [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
 ; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
 ; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4IC4-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK-VF4IC4:       [[MIDDLE_BLOCK]]:
-; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP5]], <4 x i1> [[TMP4]], i32 331)
-; CHECK-VF4IC4-NEXT:    [[TMP30:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP25]], <4 x i1> [[TMP21]], i32 [[TMP7]])
-; CHECK-VF4IC4-NEXT:    [[TMP31:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP26]], <4 x i1> [[TMP22]], i32 [[TMP30]])
-; CHECK-VF4IC4-NEXT:    [[TMP32:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP27]], <4 x i1> [[TMP23]], i32 [[TMP31]])
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]])
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX11:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP14]])
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX12:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX11]], <4 x i32> [[TMP15]])
+; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_MINMAX12]])
+; CHECK-VF4IC4-NEXT:    [[BIN_RDX:%.*]] = or <4 x i1> [[TMP20]], [[TMP16]]
+; CHECK-VF4IC4-NEXT:    [[BIN_RDX13:%.*]] = or <4 x i1> [[TMP21]], [[BIN_RDX]]
+; CHECK-VF4IC4-NEXT:    [[BIN_RDX14:%.*]] = or <4 x i1> [[TMP22]], [[BIN_RDX13]]
+; CHECK-VF4IC4-NEXT:    [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX14]])
+; CHECK-VF4IC4-NEXT:    [[TMP19:%.*]] = freeze i1 [[TMP18]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 331
 ; CHECK-VF4IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; CHECK-VF4IC4-NEXT:    br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK-VF4IC4:       [[SCALAR_PH]]:
 ; CHECK-VF4IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
-; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP32]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
 ; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK-VF4IC4:       [[FOR_BODY]]:
 ; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
@@ -1081,7 +1080,7 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
 ; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
 ; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK-VF4IC4:       [[EXIT_LOOPEXIT]]:
-; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[TMP32]], %[[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-VF4IC4-NEXT:    br label %[[EXIT]]
 ; CHECK-VF4IC4:       [[EXIT]]:
 ; CHECK-VF4IC4-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
@@ -1102,19 +1101,18 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
 ; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK-VF1IC4:       [[FOR_BODY]]:
 ; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 331, %[[VECTOR_PH]] ], [ [[TMP35:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 331, %[[VECTOR_PH]] ], [ [[TMP36:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 331, %[[VECTOR_PH]] ], [ [[TMP37:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ 331, %[[VECTOR_PH]] ], [ [[TMP38:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[TMP12:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP33:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI6:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI7:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[FOR_BODY]] ]
 ; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = add i64 [[IV]], 1
 ; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = add i64 [[IV]], 2
 ; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = add i64 [[IV]], 3
 ; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = trunc i64 [[IV]] to i32
-; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 0
 ; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = add i32 [[TMP7]], 1
 ; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = add i32 [[TMP7]], 2
 ; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = add i32 [[TMP7]], 3
@@ -1130,34 +1128,31 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
 ; CHECK-VF1IC4-NEXT:    [[TMP21:%.*]] = icmp sgt i32 [[TMP17]], 3
 ; CHECK-VF1IC4-NEXT:    [[TMP22:%.*]] = icmp sgt i32 [[TMP18]], 3
 ; CHECK-VF1IC4-NEXT:    [[TMP23:%.*]] = icmp sgt i32 [[TMP19]], 3
-; CHECK-VF1IC4-NEXT:    [[TMP24:%.*]] = freeze i1 [[CMP1]]
-; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = freeze i1 [[TMP21]]
-; CHECK-VF1IC4-NEXT:    [[TMP26:%.*]] = or i1 [[TMP24]], [[TMP25]]
-; CHECK-VF1IC4-NEXT:    [[TMP27:%.*]] = freeze i1 [[TMP22]]
-; CHECK-VF1IC4-NEXT:    [[TMP28:%.*]] = or i1 [[TMP26]], [[TMP27]]
-; CHECK-VF1IC4-NEXT:    [[TMP29:%.*]] = freeze i1 [[TMP23]]
-; CHECK-VF1IC4-NEXT:    [[TMP30:%.*]] = or i1 [[TMP28]], [[TMP29]]
-; CHECK-VF1IC4-NEXT:    [[TMP31]] = select i1 [[TMP30]], i1 [[CMP1]], i1 [[TMP12]]
-; CHECK-VF1IC4-NEXT:    [[TMP32]] = select i1 [[TMP30]], i1 [[TMP21]], i1 [[TMP1]]
-; CHECK-VF1IC4-NEXT:    [[TMP33]] = select i1 [[TMP30]], i1 [[TMP22]], i1 [[TMP2]]
-; CHECK-VF1IC4-NEXT:    [[TMP34]] = select i1 [[TMP30]], i1 [[TMP23]], i1 [[TMP3]]
-; CHECK-VF1IC4-NEXT:    [[TMP35]] = select i1 [[TMP30]], i32 [[TMP8]], i32 [[VEC_PHI]]
-; CHECK-VF1IC4-NEXT:    [[TMP36]] = select i1 [[TMP30]], i32 [[TMP9]], i32 [[VEC_PHI1]]
-; CHECK-VF1IC4-NEXT:    [[TMP37]] = select i1 [[TMP30]], i32 [[TMP10]], i32 [[VEC_PHI2]]
-; CHECK-VF1IC4-NEXT:    [[TMP38]] = select i1 [[TMP30]], i32 [[TMP11]], i32 [[VEC_PHI3]]
+; CHECK-VF1IC4-NEXT:    [[TMP28]] = or i1 [[VEC_PHI4]], [[CMP1]]
+; CHECK-VF1IC4-NEXT:    [[TMP20]] = or i1 [[VEC_PHI5]], [[TMP21]]
+; CHECK-VF1IC4-NEXT:    [[TMP29]] = or i1 [[VEC_PHI6]], [[TMP22]]
+; CHECK-VF1IC4-NEXT:    [[TMP30]] = or i1 [[VEC_PHI7]], [[TMP23]]
+; CHECK-VF1IC4-NEXT:    [[TMP27]] = select i1 [[CMP1]], i32 [[TMP7]], i32 [[VEC_PHI]]
+; CHECK-VF1IC4-NEXT:    [[TMP24]] = select i1 [[TMP21]], i32 [[TMP9]], i32 [[VEC_PHI1]]
+; CHECK-VF1IC4-NEXT:    [[TMP25]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[VEC_PHI2]]
+; CHECK-VF1IC4-NEXT:    [[TMP26]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[VEC_PHI3]]
 ; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
 ; CHECK-VF1IC4-NEXT:    [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1IC4-NEXT:    br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK-VF1IC4:       [[MIDDLE_BLOCK]]:
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP27]], i32 [[TMP24]])
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX8:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX]], i32 [[TMP25]])
+; CHECK-VF1IC4-NEXT:    [[TMP35:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX8]], i32 [[TMP26]])
+; CHECK-VF1IC4-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP20]], [[TMP28]]
+; CHECK-VF1IC4-NEXT:    [[BIN_RDX10:%.*]] = or i1 [[TMP29]], [[BIN_RDX]]
+; CHECK-VF1IC4-NEXT:    [[BIN_RDX11:%.*]] = or i1 [[TMP30]], [[BIN_RDX10]]
+; CHECK-VF1IC4-NEXT:    [[TMP31:%.*]] = freeze i1 [[BIN_RDX11]]
 ; CHECK-VF1IC4-NEXT:    [[TMP40:%.*]] = select i1 [[TMP31]], i32 [[TMP35]], i32 331
-; CHECK-VF1IC4-NEXT:    [[TMP41:%.*]] = select i1 [[TMP32]], i32 [[TMP36]], i32 [[TMP40]]
-; CHECK-VF1IC4-NEXT:    [[TMP42:%.*]] = select i1 [[TMP33]], i32 [[TMP37]], i32 [[TMP41]]
-; CHECK-VF1IC4-NEXT:    [[TMP43:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP42]]
 ; CHECK-VF1IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
 ; CHECK-VF1IC4-NEXT:    br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK-VF1IC4:       [[SCALAR_PH]]:
 ; CHECK-VF1IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
-; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP43]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP40]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
 ; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY1:.*]]
 ; CHECK-VF1IC4:       [[FOR_BODY1]]:
 ; CHECK-VF1IC4-NEXT:    [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY1]] ]
@@ -1171,7 +1166,7 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
 ; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
 ; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK-VF1IC4:       [[EXIT_LOOPEXIT]]:
-; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY1]] ], [ [[TMP43]], %[[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY1]] ], [ [[TMP40]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-VF1IC4-NEXT:    br label %[[EXIT]]
 ; CHECK-VF1IC4:       [[EXIT]]:
 ; CHECK-VF1IC4-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
@@ -1667,28 +1662,29 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
 ; CHECK-VF4IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK-VF4IC1:       [[VECTOR_BODY]]:
 ; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
 ; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP8]]
-; CHECK-VF4IC1-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
-; CHECK-VF4IC1-NEXT:    [[TMP4]] = select i1 [[TMP3]], <4 x i1> [[TMP8]], <4 x i1> [[LAST_ACTIVE_MASK]]
-; CHECK-VF4IC1-NEXT:    [[TMP5]] = select i1 [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[TMP2]] = or <4 x i1> [[VEC_PHI1]], [[TMP8]]
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
 ; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
 ; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
 ; CHECK-VF4IC1-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK-VF4IC1:       [[MIDDLE_BLOCK]]:
-; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP5]], <4 x i1> [[TMP4]], i32 -1)
+; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]])
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = freeze i1 [[TMP9]]
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 -1
 ; CHECK-VF4IC1-NEXT:    br label %[[SCALAR_PH:.*]]
 ; CHECK-VF4IC1:       [[SCALAR_PH]]:
 ; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK-VF4IC1:       [[FOR_BODY]]:
 ; CHECK-VF4IC1-NEXT:    [[IV1:%.*]] = phi i64 [ 2147483648, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[TMP7]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
 ; CHECK-VF4IC1-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV1]]
 ; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
 ; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
@@ -1709,14 +1705,14 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
 ; CHECK-VF4IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK-VF4IC4:       [[VECTOR_BODY]]:
 ; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[TMP12:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[TMP19:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[TMP20:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-VF4IC4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
@@ -1733,37 +1729,35 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
 ; CHECK-VF4IC4-NEXT:    [[TMP9:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
 ; CHECK-VF4IC4-NEXT:    [[TMP10:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD5]], zeroinitializer
 ; CHECK-VF4IC4-NEXT:    [[TMP11:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD6]], zeroinitializer
-; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP8]]
-; CHECK-VF4IC4-NEXT:    [[TMP13:%.*]] = freeze <4 x i1> [[TMP9]]
-; CHECK-VF4IC4-NEXT:    [[TMP14:%.*]] = or <4 x i1> [[TMP2]], [[TMP13]]
-; CHECK-VF4IC4-NEXT:    [[TMP15:%.*]] = freeze <4 x i1> [[TMP10]]
-; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = or <4 x i1> [[TMP14]], [[TMP15]]
-; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = freeze <4 x i1> [[TMP11]]
-; CHECK-VF4IC4-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]]
-; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
-; CHECK-VF4IC4-NEXT:    [[TMP4]] = select i1 [[TMP3]], <4 x i1> [[TMP8]], <4 x i1> [[LAST_ACTIVE_MASK]]
-; CHECK-VF4IC4-NEXT:    [[TMP21]] = select i1 [[TMP3]], <4 x i1> [[TMP9]], <4 x i1> [[TMP12]]
-; CHECK-VF4IC4-NEXT:    [[TMP22]] = select i1 [[TMP3]], <4 x i1> [[TMP10]], <4 x i1> [[TMP19]]
-; CHECK-VF4IC4-NEXT:    [[TMP23]] = select i1 [[TMP3]], <4 x i1> [[TMP11]], <4 x i1> [[TMP20]]
-; CHECK-VF4IC4-NEXT:    [[TMP5]] = select i1 [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
-; CHECK-VF4IC4-NEXT:    [[TMP25]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
-; CHECK-VF4IC4-NEXT:    [[TMP26]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
-; CHECK-VF4IC4-NEXT:    [[TMP27]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
+; CHECK-VF4IC4-NEXT:    [[TMP16]] = or <4 x i1> [[VEC_PHI4]], [[TMP8]]
+; CHECK-VF4IC4-NEXT:    [[TMP20]] = or <4 x i1> [[VEC_PHI5]], [[TMP9]]
+; CHECK-VF4IC4-NEXT:    [[TMP21]] = or <4 x i1> [[VEC_PHI6]], [[TMP10]]
+; CHECK-VF4IC4-NEXT:    [[TMP22]] = or <4 x i1> [[VEC_PHI7]], [[TMP11]]
+; CHECK-VF4IC4-NEXT:    [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC4-NEXT:    [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
+; CHECK-VF4IC4-NEXT:    [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
+; CHECK-VF4IC4-NEXT:    [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
 ; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[IV]], 16
 ; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
 ; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
 ; CHECK-VF4IC4-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK-VF4IC4:       [[MIDDLE_BLOCK]]:
-; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP5]], <4 x i1> [[TMP4]], i32 -1)
-; CHECK-VF4IC4-NEXT:    [[TMP30:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP25]], <4 x i1> [[TMP21]], i32 [[TMP7]])
-; CHECK-VF4IC4-NEXT:    [[TMP31:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP26]], <4 x i1> [[TMP22]], i32 [[TMP30]])
-; CHECK-VF4IC4-NEXT:    [[TMP32:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP27]], <4 x i1> [[TMP23]], i32 [[TMP31]])
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]])
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX11:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP14]])
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX12:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX11]], <4 x i32> [[TMP15]])
+; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_MINMAX12]])
+; CHECK-VF4IC4-NEXT:    [[BIN_RDX:%.*]] = or <4 x i1> [[TMP20]], [[TMP16]]
+; CHECK-VF4IC4-NEXT:    [[BIN_RDX13:%.*]] = or <4 x i1> [[TMP21]], [[BIN_RDX]]
+; CHECK-VF4IC4-NEXT:    [[BIN_RDX14:%.*]] = or <4 x i1> [[TMP22]], [[BIN_RDX13]]
+; CHECK-VF4IC4-NEXT:    [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX14]])
+; CHECK-VF4IC4-NEXT:    [[TMP19:%.*]] = freeze i1 [[TMP18]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 -1
 ; CHECK-VF4IC4-NEXT:    br label %[[SCALAR_PH:.*]]
 ; CHECK-VF4IC4:       [[SCALAR_PH]]:
 ; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK-VF4IC4:       [[FOR_BODY]]:
 ; CHECK-VF4IC4-NEXT:    [[IV1:%.*]] = phi i64 [ 2147483648, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
-; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[TMP32]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
 ; CHECK-VF4IC4-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV1]]
 ; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
 ; CHECK-VF4IC4-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
@@ -1784,19 +1778,18 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
 ; CHECK-VF1IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK-VF1IC4:       [[VECTOR_BODY]]:
 ; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, %[[FOR_BODY]] ], [ [[TMP35:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ -1, %[[FOR_BODY]] ], [ [[TMP36:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ -1, %[[FOR_BODY]] ], [ [[TMP37:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ -1, %[[FOR_BODY]] ], [ [[TMP38:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[TMP12:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP32:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP33:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI6:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI7:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = add i64 [[IV]], 1
 ; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = add i64 [[IV]], 2
 ; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = add i64 [[IV]], 3
 ; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = trunc i64 [[IV]] to i32
-; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], 0
 ; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = add i32 [[TMP7]], 1
 ; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = add i32 [[TMP7]], 2
 ; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = add i32 [[TMP7]], 3
@@ -1812,35 +1805,32 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
 ; CHECK-VF1IC4-NEXT:    [[TMP21:%.*]] = fcmp fast olt float [[TMP17]], 0.000000e+00
 ; CHECK-VF1IC4-NEXT:    [[TMP22:%.*]] = fcmp fast olt float [[TMP18]], 0.000000e+00
 ; CHECK-VF1IC4-NEXT:    [[TMP23:%.*]] = fcmp fast olt float [[TMP19]], 0.000000e+00
-; CHECK-VF1IC4-NEXT:    [[TMP24:%.*]] = freeze i1 [[CMP]]
-; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = freeze i1 [[TMP21]]
-; CHECK-VF1IC4-NEXT:    [[TMP26:%.*]] = or i1 [[TMP24]], [[TMP25]]
-; CHECK-VF1IC4-NEXT:    [[TMP27:%.*]] = freeze i1 [[TMP22]]
-; CHECK-VF1IC4-NEXT:    [[TMP28:%.*]] = or i1 [[TMP26]], [[TMP27]]
-; CHECK-VF1IC4-NEXT:    [[TMP29:%.*]] = freeze i1 [[TMP23]]
-; CHECK-VF1IC4-NEXT:    [[TMP30:%.*]] = or i1 [[TMP28]], [[TMP29]]
-; CHECK-VF1IC4-NEXT:    [[TMP31]] = select i1 [[TMP30]], i1 [[CMP]], i1 [[TMP12]]
-; CHECK-VF1IC4-NEXT:    [[TMP32]] = select i1 [[TMP30]], i1 [[TMP21]], i1 [[TMP1]]
-; CHECK-VF1IC4-NEXT:    [[TMP33]] = select i1 [[TMP30]], i1 [[TMP22]], i1 [[TMP2]]
-; CHECK-VF1IC4-NEXT:    [[TMP34]] = select i1 [[TMP30]], i1 [[TMP23]], i1 [[TMP3]]
-; CHECK-VF1IC4-NEXT:    [[TMP35]] = select i1 [[TMP30]], i32 [[TMP8]], i32 [[VEC_PHI]]
-; CHECK-VF1IC4-NEXT:    [[TMP36]] = select i1 [[TMP30]], i32 [[TMP9]], i32 [[VEC_PHI1]]
-; CHECK-VF1IC4-NEXT:    [[TMP37]] = select i1 [[TMP30]], i32 [[TMP10]], i32 [[VEC_PHI2]]
-; CHECK-VF1IC4-NEXT:    [[TMP38]] = select i1 [[TMP30]], i32 [[TMP11]], i32 [[VEC_PHI3]]
+; CHECK-VF1IC4-NEXT:    [[TMP28]] = or i1 [[VEC_PHI4]], [[CMP]]
+; CHECK-VF1IC4-NEXT:    [[TMP20]] = or i1 [[VEC_PHI5]], [[TMP21]]
+; CHECK-VF1IC4-NEXT:    [[TMP29]] = or i1 [[VEC_PHI6]], [[TMP22]]
+; CHECK-VF1IC4-NEXT:    [[TMP30]] = or i1 [[VEC_PHI7]], [[TMP23]]
+; CHECK-VF1IC4-NEXT:    [[TMP27]] = select i1 [[CMP]], i32 [[TMP7]], i32 [[VEC_PHI]]
+; CHECK-VF1IC4-NEXT:    [[TMP24]] = select i1 [[TMP21]], i32 [[TMP9]], i32 [[VEC_PHI1]]
+; CHECK-VF1IC4-NEXT:    [[TMP25]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[VEC_PHI2]]
+; CHECK-VF1IC4-NEXT:    [[TMP26]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[VEC_PHI3]]
 ; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
 ; CHECK-VF1IC4-NEXT:    [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
 ; CHECK-VF1IC4-NEXT:    br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK-VF1IC4:       [[MIDDLE_BLOCK]]:
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP27]], i32 [[TMP24]])
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX8:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX]], i32 [[TMP25]])
+; CHECK-VF1IC4-NEXT:    [[TMP35:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX8]], i32 [[TMP26]])
+; CHECK-VF1IC4-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP20]], [[TMP28]]
+; CHECK-VF1IC4-NEXT:    [[BIN_RDX10:%.*]] = or i1 [[TMP29]], [[BIN_RDX]]
+; CHECK-VF1IC4-NEXT:    [[BIN_RDX11:%.*]] = or i1 [[TMP30]], [[BIN_RDX10]]
+; CHECK-VF1IC4-NEXT:    [[TMP31:%.*]] = freeze i1 [[BIN_RDX11]]
 ; CHECK-VF1IC4-NEXT:    [[TMP40:%.*]] = select i1 [[TMP31]], i32 [[TMP35]], i32 -1
-; CHECK-VF1IC4-NEXT:    [[TMP41:%.*]] = select i1 [[TMP32]], i32 [[TMP36]], i32 [[TMP40]]
-; CHECK-VF1IC4-NEXT:    [[TMP42:%.*]] = select i1 [[TMP33]], i32 [[TMP37]], i32 [[TMP41]]
-; CHECK-VF1IC4-NEXT:    [[TMP43:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP42]]
 ; CHECK-VF1IC4-NEXT:    br label %[[SCALAR_PH:.*]]
 ; CHECK-VF1IC4:       [[SCALAR_PH]]:
 ; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY1:.*]]
 ; CHECK-VF1IC4:       [[FOR_BODY1]]:
 ; CHECK-VF1IC4-NEXT:    [[IV1:%.*]] = phi i64 [ 2147483648, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY1]] ]
-; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[TMP43]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY1]] ]
+; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[TMP40]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY1]] ]
 ; CHECK-VF1IC4-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV1]]
 ; CHECK-VF1IC4-NEXT:    [[TMP44:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
 ; CHECK-VF1IC4-NEXT:    [[CMP1:%.*]] = fcmp fast olt float [[TMP44]], 0.000000e+00
diff --git a/polly/test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll b/polly/test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll
index b1f7e65e9dd25..dbedc28a927e6 100644
--- a/polly/test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll
+++ b/polly/test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll
@@ -28,7 +28,7 @@
 ; INNERMOST-NEXT:    Invalid Context:
 ; INNERMOST-NEXT:    [tmp6, p_1, p_2] -> {  : p_2 < p_1 and (tmp6 < 0 or tmp6 > 0) }
 ; INNERMOST:         p0: %tmp6
-; INNERMOST-NEXT:    p1: {0,+,(sext i32 %N to i64)}<%bb3>
+; INNERMOST-NEXT:    p1: {0,+,(sext i32 %N to i64)}<nsw><%bb3>
 ; INNERMOST-NEXT:    p2: {0,+,1}<nuw><nsw><%bb3>
 ; INNERMOST-NEXT:    Arrays {
 ; INNERMOST-NEXT:        i32 MemRef_A[*]; // Element size 4

>From 2b91b5b61a218c102e75806f5d1381a9861977d1 Mon Sep 17 00:00:00 2001
From: Tomas Matheson <Tomas.Matheson at arm.com>
Date: Wed, 24 Jun 2026 11:43:29 +0100
Subject: [PATCH 16/42] [AArch64][SVE] Use ADD/ADR instead of MUL/MLA for x*N
 (#198566)

Avoid `MUL`/`MLA` for all-active multiplies by small constants when
cheaper `ADD`/`ADR` sequences are available.

Vector multiplication (int32_t/uint32_t base types) by 2, 3, 5, 9 can be
done with ADD (for 2) ADR (for 3,5,9).
Similarly, operations of the form a + x * {1,2,4,8} can use ADR.
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  51 +++
 .../AArch64/sve-intrinsics-int-arith-undef.ll |   2 +-
 .../CodeGen/AArch64/sve-mul-imm-add-adr.ll    | 372 ++++++++++++++++++
 llvm/test/CodeGen/AArch64/sve2-histcnt.ll     |   4 +-
 4 files changed, 425 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-mul-imm-add-adr.ll

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4712406e37e6b..64dab3296dddd 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1948,6 +1948,57 @@ let Predicates = [HasSVE] in {
   defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_2, 2>;
   defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_3, 3>;
 
+  // Avoid MUL/MLA for small constants where ADD/ADR forms are available.
+  multiclass sveMulMlaImmAddAdrPat<ValueType Ty, ValueType PredTy,
+                                   ValueType ScalarTy,
+                                   Instruction Add, Instruction Adr1,
+                                   Instruction Adr2, Instruction Adr3> {
+    // MUL
+    def : Pat<(Ty (AArch64mul_p (PredTy (SVEAnyPredicate)), Ty:$Op,
+                                (Ty (splat_vector (ScalarTy 2))))),
+              (Add $Op, $Op)>;
+    def : Pat<(Ty (AArch64mul_p (PredTy (SVEAnyPredicate)), Ty:$Op,
+                                (Ty (splat_vector (ScalarTy 3))))),
+              (Adr1 $Op, $Op)>;
+    def : Pat<(Ty (AArch64mul_p (PredTy (SVEAnyPredicate)), Ty:$Op,
+                                (Ty (splat_vector (ScalarTy 5))))),
+              (Adr2 $Op, $Op)>;
+    def : Pat<(Ty (AArch64mul_p (PredTy (SVEAnyPredicate)), Ty:$Op,
+                                (Ty (splat_vector (ScalarTy 9))))),
+              (Adr3 $Op, $Op)>;
+
+    // MLA
+    def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, Ty:$Op,
+                                (Ty (splat_vector (ScalarTy 2))))),
+              (Adr1 $Acc, $Op)>;
+    def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, Ty:$Op,
+                                (Ty (splat_vector (ScalarTy 4))))),
+              (Adr2 $Acc, $Op)>;
+    def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, Ty:$Op,
+                                (Ty (splat_vector (ScalarTy 8))))),
+              (Adr3 $Acc, $Op)>;
+
+    // MLA commuted. These can be removed if the commuted forms are canonicalized.
+    def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc,
+                                (Ty (splat_vector (ScalarTy 2))), Ty:$Op)),
+              (Adr1 $Acc, $Op)>;
+    def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc,
+                                (Ty (splat_vector (ScalarTy 4))), Ty:$Op)),
+              (Adr2 $Acc, $Op)>;
+    def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc,
+                                (Ty (splat_vector (ScalarTy 8))), Ty:$Op)),
+              (Adr3 $Acc, $Op)>;
+  }
+
+  let AddedComplexity = 10 in {
+    defm : sveMulMlaImmAddAdrPat<nxv2i64, nxv2i1, i64,
+                              ADD_ZZZ_D, ADR_LSL_ZZZ_D_1,
+                              ADR_LSL_ZZZ_D_2, ADR_LSL_ZZZ_D_3>;
+    defm : sveMulMlaImmAddAdrPat<nxv4i32, nxv4i1, i32,
+                              ADD_ZZZ_S, ADR_LSL_ZZZ_S_1,
+                              ADR_LSL_ZZZ_S_2, ADR_LSL_ZZZ_S_3>;
+  }
+
   // adr z0.d, [z0.d, z0.d, uxtw #<shift>]
   // adr z0.d, [z0.d, z0.d, sxtw #<shift>]
   multiclass adrXtwShiftPat<ValueType Ty, ValueType PredTy, int ShiftAmt> {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll
index 82b39785a07b5..1446f6956cf04 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll
@@ -301,7 +301,7 @@ define <vscale x 8 x i16> @mul_imm_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 define <vscale x 4 x i32> @mul_imm_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
 ; CHECK-LABEL: mul_imm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul z0.s, z0.s, #5
+; CHECK-NEXT:    adr z0.s, [z0.s, z0.s, lsl #2]
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg,
                                                                  <vscale x 4 x i32> %a,
diff --git a/llvm/test/CodeGen/AArch64/sve-mul-imm-add-adr.ll b/llvm/test/CodeGen/AArch64/sve-mul-imm-add-adr.ll
new file mode 100644
index 0000000000000..91423fc1d068d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-mul-imm-add-adr.ll
@@ -0,0 +1,372 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+
+define <vscale x 4 x i32> @mul_i32_by_2(<vscale x 4 x i32> %x) {
+; CHECK-LABEL: mul_i32_by_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+  %out = mul <vscale x 4 x i32> %x, splat(i32 2)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @mul_i32_by_3(<vscale x 4 x i32> %x) {
+; CHECK-LABEL: mul_i32_by_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z0.s, lsl #1]
+; CHECK-NEXT:    ret
+  %out = mul <vscale x 4 x i32> %x, splat(i32 3)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @mul_i32_by_5(<vscale x 4 x i32> %x) {
+; CHECK-LABEL: mul_i32_by_5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z0.s, lsl #2]
+; CHECK-NEXT:    ret
+  %out = mul <vscale x 4 x i32> %x, splat(i32 5)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @mul_i32_by_9(<vscale x 4 x i32> %x) {
+; CHECK-LABEL: mul_i32_by_9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z0.s, lsl #3]
+; CHECK-NEXT:    ret
+  %out = mul <vscale x 4 x i32> %x, splat(i32 9)
+  ret <vscale x 4 x i32> %out
+}
+
+
+define <vscale x 2 x i64> @mul_i64_by_2_commuted(<vscale x 2 x i64> %x) {
+; CHECK-LABEL: mul_i64_by_2_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl z0.d, z0.d, #1
+; CHECK-NEXT:    ret
+  %out = mul <vscale x 2 x i64> splat(i64 2), %x
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @mul_i64_by_3_commuted(<vscale x 2 x i64> %x) {
+; CHECK-LABEL: mul_i64_by_3_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z0.d, lsl #1]
+; CHECK-NEXT:    ret
+  %out = mul <vscale x 2 x i64> splat(i64 3), %x
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @mul_i64_by_5_commuted(<vscale x 2 x i64> %x) {
+; CHECK-LABEL: mul_i64_by_5_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z0.d, lsl #2]
+; CHECK-NEXT:    ret
+  %out = mul <vscale x 2 x i64> splat(i64 5), %x
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @mul_i64_by_9_commuted(<vscale x 2 x i64> %x) {
+; CHECK-LABEL: mul_i64_by_9_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z0.d, lsl #3]
+; CHECK-NEXT:    ret
+  %out = mul <vscale x 2 x i64> splat(i64 9), %x
+  ret <vscale x 2 x i64> %out
+}
+
+
+define <vscale x 4 x i32> @mla_i32_by_2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: mla_i32_by_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #1]
+; CHECK-NEXT:    ret
+  %mul = mul <vscale x 4 x i32> %x, splat(i32 2)
+  %out = add <vscale x 4 x i32> %a, %mul
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @mla_i32_by_4(<vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: mla_i32_by_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #2]
+; CHECK-NEXT:    ret
+  %mul = mul <vscale x 4 x i32> %x, splat(i32 4)
+  %out = add <vscale x 4 x i32> %a, %mul
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @mla_i32_by_8(<vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: mla_i32_by_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #3]
+; CHECK-NEXT:    ret
+  %mul = mul <vscale x 4 x i32> %x, splat(i32 8)
+  %out = add <vscale x 4 x i32> %a, %mul
+  ret <vscale x 4 x i32> %out
+}
+
+
+define <vscale x 2 x i64> @mla_i64_by_2_commuted(<vscale x 2 x i64> %a, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: mla_i64_by_2_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #1]
+; CHECK-NEXT:    ret
+  %mul = mul <vscale x 2 x i64> splat(i64 2), %x
+  %out = add <vscale x 2 x i64> %a, %mul
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @mla_i64_by_4_commuted(<vscale x 2 x i64> %a, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: mla_i64_by_4_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #2]
+; CHECK-NEXT:    ret
+  %mul = mul <vscale x 2 x i64> splat(i64 4), %x
+  %out = add <vscale x 2 x i64> %a, %mul
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @mla_i64_by_8_commuted(<vscale x 2 x i64> %a, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: mla_i64_by_8_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #3]
+; CHECK-NEXT:    ret
+  %mul = mul <vscale x 2 x i64> splat(i64 8), %x
+  %out = add <vscale x 2 x i64> %a, %mul
+  ret <vscale x 2 x i64> %out
+}
+
+
+define <vscale x 4 x i32> @svmul_u_i32_by_2(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_u_i32_by_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add z0.s, z0.s, z0.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 2))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_u_i32_by_3(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_u_i32_by_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z0.s, lsl #1]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 3))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_u_i32_by_5(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_u_i32_by_5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z0.s, lsl #2]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 5))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_u_i32_by_9(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_u_i32_by_9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z0.s, lsl #3]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 9))
+  ret <vscale x 4 x i32> %out
+}
+
+
+define <vscale x 4 x i32> @svmul_m_partial_i32_by_2(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_m_partial_i32_by_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #2 // =0x2
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 2))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_m_partial_i32_by_3(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_m_partial_i32_by_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #3 // =0x3
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 3))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_m_partial_i32_by_5(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_m_partial_i32_by_5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #5 // =0x5
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 5))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_m_partial_i32_by_9(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_m_partial_i32_by_9:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #9 // =0x9
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 9))
+  ret <vscale x 4 x i32> %out
+}
+
+
+define <vscale x 4 x i32> @svmul_u_i32_by_2_commuted(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_u_i32_by_2_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #2 // =0x2
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 2), <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_u_i32_by_3_commuted(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_u_i32_by_3_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #3 // =0x3
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 3), <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_u_i32_by_5_commuted(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_u_i32_by_5_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #5 // =0x5
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 5), <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmul_u_i32_by_9_commuted(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmul_u_i32_by_9_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #9 // =0x9
+; CHECK-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat(i32 9), <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %out
+}
+
+
+define <vscale x 4 x i32> @svmla_u_i32_by_2(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_u_i32_by_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #1]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 2))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmla_u_i32_by_4(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_u_i32_by_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #2]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 4))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmla_u_i32_by_8(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_u_i32_by_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #3]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 8))
+  ret <vscale x 4 x i32> %out
+}
+
+
+define <vscale x 4 x i32> @svmla_u_i32_by_2_commuted(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_u_i32_by_2_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #1]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat(i32 2), <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmla_u_i32_by_4_commuted(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_u_i32_by_4_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #2]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat(i32 4), <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmla_u_i32_by_8_commuted(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_u_i32_by_8_commuted:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.s, [z0.s, z1.s, lsl #3]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat(i32 8), <vscale x 4 x i32> %x)
+  ret <vscale x 4 x i32> %out
+}
+
+
+define <vscale x 4 x i32> @svmla_m_partial_i32_by_2(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_m_partial_i32_by_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.s, #2 // =0x2
+; CHECK-NEXT:    mla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 2))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmla_m_partial_i32_by_4(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_m_partial_i32_by_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.s, #4 // =0x4
+; CHECK-NEXT:    mla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 4))
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @svmla_m_partial_i32_by_8(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: svmla_m_partial_i32_by_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.s, #8 // =0x8
+; CHECK-NEXT:    mla z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %x, <vscale x 4 x i32> splat(i32 8))
+  ret <vscale x 4 x i32> %out
+}
+
+
+define <vscale x 2 x i64> @svmla_u_i64_by_2(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: svmla_u_i64_by_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #1]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %x, <vscale x 2 x i64> splat(i64 2))
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @svmla_u_i64_by_4(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: svmla_u_i64_by_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #2]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %x, <vscale x 2 x i64> splat(i64 4))
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @svmla_u_i64_by_8(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: svmla_u_i64_by_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adr z0.d, [z0.d, z1.d, lsl #3]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %x, <vscale x 2 x i64> splat(i64 8))
+  ret <vscale x 2 x i64> %out
+}
diff --git a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
index 6596abe2f105a..a49bdafb8a46a 100644
--- a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
@@ -147,10 +147,8 @@ define void @histogram_i16_literal_2(ptr %base, <vscale x 4 x i32> %indices, <vs
 ; CHECK-LABEL: histogram_i16_literal_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    histcnt z1.s, p0/z, z0.s, z0.s
-; CHECK-NEXT:    mov z3.s, #2 // =0x2
 ; CHECK-NEXT:    ld1h { z2.s }, p0/z, [x0, z0.s, sxtw #1]
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    mad z1.s, p1/m, z3.s, z2.s
+; CHECK-NEXT:    adr z1.s, [z2.s, z1.s, lsl #1]
 ; CHECK-NEXT:    st1h { z1.s }, p0, [x0, z0.s, sxtw #1]
 ; CHECK-NEXT:    ret
   %buckets = getelementptr i16, ptr %base, <vscale x 4 x i32> %indices

>From 36f12f975f395cab406f161086b6a808ea0440af Mon Sep 17 00:00:00 2001
From: Kareem Ergawy <kergawy at nvidia.com>
Date: Wed, 24 Jun 2026 12:46:15 +0200
Subject: [PATCH 17/42] [FIR] Route embox + projected complex slice through
 shapeVec (#205042)

When the array_coor base is a fir.embox with a projected complex %re/%im
slice, take the shapeVec path instead of the descriptor (fir.box_dims)
path. The descriptor path iterates source-rank dims while querying the
rank-reduced embox result box, which miscompiles slices that collapse
dims (e.g. complex(:,k)%re). For embox-derived boxes the underlying
storage is contiguous, so the shape-derived layout is both correct and
the natural place to encode that static shape is available. Non-embox
boxes (rebox, assumed-shape) still go through fir.box_dims.

Co-Authored-By: Claude Sonnet 4.6 <noreply at anthropic.com>

Co-authored-by: Claude Sonnet 4.6 <noreply at anthropic.com>
---
 .../lib/Optimizer/Transforms/FIRToMemRef.cpp  | 15 +---
 .../FIRToMemRef/slice-projected.mlir          | 70 +++++--------------
 2 files changed, 21 insertions(+), 64 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp b/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp
index 3f738a25ec98b..1dcc056387c48 100644
--- a/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp
+++ b/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp
@@ -773,20 +773,9 @@ FIRToMemRef::convertArrayCoorOp(Operation *memOp, fir::ArrayCoorOp arrayCoorOp,
   //
   //   box_dims path: query the descriptor at runtime. Required when:
   //     (a) we have no shape information at all; or
-  //     (b) the array_coor base is a fir.box that is NOT a fir.embox result;
-  //         or a fir.box with a projected slice (layout in the descriptor); or
-  //     (c) embox cannot supply layout for this coor (non-embox box above).
-  //         getFIRConvert materializes fir.box_addr(box) -- an opaque pointer
-  //         with no layout in its type -- so strides must come from the
-  //         descriptor. This matches CodeGen XArrayCoorOp's boxed branch
-  //         (getStrideFromBox); shape/shape_shift on array_coor is
-  //         informational only (lower bounds for index translation).
-  //     Projected complex %re/%im on a bare ref uses the shapeVec path with
-  //     strides scaled by two scalar slots per complex.
-  const bool boxNeedsDescriptorStrides =
-      firMemrefIsBox && (!firMemrefIsEmbox || sliceInfo.hasProjectedSlice);
+  //     (b) the array_coor base is a fir.box that is NOT a fir.embox result.
   const bool descriptorOwnsLayout =
-      shapeVec.empty() || boxNeedsDescriptorStrides;
+      shapeVec.empty() || (firMemrefIsBox && !firMemrefIsEmbox);
   if (descriptorOwnsLayout) {
     // Plain `!fir.ref` without recoverable shape extents cannot use fir.box_*.
     if (shapeVec.empty() && !sliceInfo.hasProjectedSlice && !isDescriptor &&
diff --git a/flang/test/Transforms/FIRToMemRef/slice-projected.mlir b/flang/test/Transforms/FIRToMemRef/slice-projected.mlir
index 7d29fca000fad..0a5cb672333ed 100644
--- a/flang/test/Transforms/FIRToMemRef/slice-projected.mlir
+++ b/flang/test/Transforms/FIRToMemRef/slice-projected.mlir
@@ -29,22 +29,12 @@
 // CHECK:         [[MEMREF:%.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<4xcomplex<f32>>>) -> memref<4xcomplex<f32>>
 // CHECK:         [[IDX:%.*]] = arith.addi
 // CHECK:         [[COMP:%[0-9]+]] = fir.convert [[MEMREF]] : (memref<4xcomplex<f32>>) -> memref<4x2xf32>
-// CHECK:         %[[FWD_C_RE:.*]] = arith.constant 0 : index
-// CHECK:         %[[FWD_C_SZF32:.*]] = arith.constant 4 : index
-// CHECK:         %[[FWD_C_DIM0:.*]] = arith.constant 0 : index
-// CHECK:         [[BD:%[0-9]+]]:3 = fir.box_dims %2, %[[FWD_C_DIM0]] : (!fir.box<!fir.array<4xf32>>, index) -> (index, index, index)
-// CHECK:         [[STRIDE:%[0-9]+]] = arith.divsi [[BD]]#2, %[[FWD_C_SZF32]] : index
-// Reinterpret applies the embox descriptor layout onto the scalar view:
-//   sizes[0]   = box extent (section length in f32 slots)
-//   sizes[1]   = 2 for the (re, im) pair exposed by memref<4x2xf32>
-//   strides[0] = box_dims byte_stride / sizeof(f32) (not box_elesize)
-//   strides[1] = 1 between adjacent real/imag scalars
-// Without this, memref.load would use dense strides from fir.convert only.
-// CHECK:         %[[FWD_C_PAIR:.*]] = arith.constant 2 : index
-// CHECK:         %[[FWD_C_COMP_STRIDE:.*]] = arith.constant 1 : index
-// CHECK:         %[[FWD_C_OFF:.*]] = arith.constant 0 : index
-// CHECK:         [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%[[FWD_C_OFF]]], sizes: [[[BD]]#1, %[[FWD_C_PAIR]]], strides: [[[STRIDE]], %[[FWD_C_COMP_STRIDE]]] : memref<4x2xf32> to memref<?x?xf32, strided<
-// CHECK:         [[LOAD:%[0-9]+]] = memref.load [[VIEW]][[[IDX]], %[[FWD_C_RE]]] : memref<?x?xf32, strided<
+// Reinterpret applies the shape-derived layout onto the scalar view:
+//   sizes   = [shape extent, 2 (re/im pair)]
+//   strides = [2 (one complex == two scalar slots), 1]
+// CHECK-NOT:     fir.box_dims
+// CHECK:         [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%c0{{.*}}], sizes: [%c4{{.*}}, %c2{{.*}}], strides: [%c2{{.*}}, %c1{{.*}}] : memref<4x2xf32> to memref<?x?xf32, strided<
+// CHECK:         [[LOAD:%[0-9]+]] = memref.load [[VIEW]][[[IDX]], %c0{{.*}}] : memref<?x?xf32, strided<
 func.func @projected_slice_fwd(%arg0: !fir.ref<!fir.array<4xcomplex<f32>>>) {
   %c1 = arith.constant 1 : index
   %c4 = arith.constant 4 : index
@@ -68,17 +58,10 @@ func.func @projected_slice_fwd(%arg0: !fir.ref<!fir.array<4xcomplex<f32>>>) {
 // CHECK:         [[MEMREF:%.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<4xcomplex<f32>>>) -> memref<4xcomplex<f32>>
 // CHECK:         [[IDX:%.*]] = arith.addi
 // CHECK:         [[COMP:%[0-9]+]] = fir.convert [[MEMREF]] : (memref<4xcomplex<f32>>) -> memref<4x2xf32>
-// CHECK:         %[[BWD_C_RE:.*]] = arith.constant 0 : index
-// CHECK:         %[[BWD_C_SZF32:.*]] = arith.constant 4 : index
-// CHECK:         %[[BWD_C_DIM0:.*]] = arith.constant 0 : index
-// CHECK:         [[BD:%[0-9]+]]:3 = fir.box_dims %2, %[[BWD_C_DIM0]] : (!fir.box<!fir.array<4xf32>>, index) -> (index, index, index)
-// CHECK:         [[STRIDE:%[0-9]+]] = arith.divsi [[BD]]#2, %[[BWD_C_SZF32]] : index
 // Same reinterpret as forward; slice triple only changes [[IDX]], not strides.
-// CHECK:         %[[BWD_C_PAIR:.*]] = arith.constant 2 : index
-// CHECK:         %[[BWD_C_COMP_STRIDE:.*]] = arith.constant 1 : index
-// CHECK:         %[[BWD_C_OFF:.*]] = arith.constant 0 : index
-// CHECK:         [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%[[BWD_C_OFF]]], sizes: [[[BD]]#1, %[[BWD_C_PAIR]]], strides: [[[STRIDE]], %[[BWD_C_COMP_STRIDE]]] : memref<4x2xf32> to memref<?x?xf32, strided<
-// CHECK:         [[LOAD:%[0-9]+]] = memref.load [[VIEW]][[[IDX]], %[[BWD_C_RE]]] : memref<?x?xf32, strided<
+// CHECK-NOT:     fir.box_dims
+// CHECK:         [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%c0{{.*}}], sizes: [%c4{{.*}}, %c2{{.*}}], strides: [%c2{{.*}}, %c1{{.*}}] : memref<4x2xf32> to memref<?x?xf32, strided<
+// CHECK:         [[LOAD:%[0-9]+]] = memref.load [[VIEW]][[[IDX]], %c0{{.*}}] : memref<?x?xf32, strided<
 func.func @projected_slice_bwd(%arg0: !fir.ref<!fir.array<4xcomplex<f32>>>) {
   %c1 = arith.constant 1 : index
   %c4 = arith.constant 4 : index
@@ -103,17 +86,10 @@ func.func @projected_slice_bwd(%arg0: !fir.ref<!fir.array<4xcomplex<f32>>>) {
 // CHECK:         [[MEMREF:%.*]] = fir.convert %arg0 : (!fir.ref<!fir.array<4xcomplex<f32>>>) -> memref<4xcomplex<f32>>
 // CHECK:         [[IDX:%.*]] = arith.addi
 // CHECK:         [[COMP:%[0-9]+]] = fir.convert [[MEMREF]] : (memref<4xcomplex<f32>>) -> memref<4x2xf32>
-// CHECK:         %[[IM_C_IM:.*]] = arith.constant 1 : index
-// CHECK:         %[[IM_C_SZF32:.*]] = arith.constant 4 : index
-// CHECK:         %[[IM_C_DIM0:.*]] = arith.constant 0 : index
-// CHECK:         [[BD:%[0-9]+]]:3 = fir.box_dims %2, %[[IM_C_DIM0]] : (!fir.box<!fir.array<4xf32>>, index) -> (index, index, index)
-// CHECK:         [[STRIDE:%[0-9]+]] = arith.divsi [[BD]]#2, %[[IM_C_SZF32]] : index
 // Same layout as %re; store uses component index 1 for imaginary.
-// CHECK:         %[[IM_C_PAIR:.*]] = arith.constant 2 : index
-// CHECK:         %[[IM_C_COMP_STRIDE:.*]] = arith.constant 1 : index
-// CHECK:         %[[IM_C_OFF:.*]] = arith.constant 0 : index
-// CHECK:         [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%[[IM_C_OFF]]], sizes: [[[BD]]#1, %[[IM_C_PAIR]]], strides: [[[STRIDE]], %[[IM_C_COMP_STRIDE]]] : memref<4x2xf32> to memref<?x?xf32, strided<
-// CHECK:         memref.store %arg1, [[VIEW]][[[IDX]], %[[IM_C_IM]]] : memref<?x?xf32, strided<
+// CHECK-NOT:     fir.box_dims
+// CHECK:         [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%c0{{.*}}], sizes: [%c4{{.*}}, %c2{{.*}}], strides: [%c2{{.*}}, %c1{{.*}}] : memref<4x2xf32> to memref<?x?xf32, strided<
+// CHECK:         memref.store %arg1, [[VIEW]][[[IDX]], %c1{{.*}}] : memref<?x?xf32, strided<
 func.func @projected_slice_store_im(%arg0: !fir.ref<!fir.array<4xcomplex<f32>>>,
                                     %arg1: f32) {
   %c1 = arith.constant 1 : index
@@ -152,21 +128,13 @@ func.func @projected_slice_store_im(%arg0: !fir.ref<!fir.array<4xcomplex<f32>>>,
 // CHECK:           [[IDX_I:%.*]] = arith.addi
 // CHECK:           [[IDX_J:%.*]] = arith.addi
 // CHECK:           [[COMP:%[0-9]+]] = fir.convert [[MEMREF]] : (memref<3x2xcomplex<f32>>) -> memref<3x2x2xf32>
-// CHECK:           %[[D2_C_RE:.*]] = arith.constant 0 : index
-// CHECK:           %[[D2_C_SZF32:.*]] = arith.constant 4 : index
-// CHECK:           %[[D2_C_DIM1:.*]] = arith.constant 1 : index
-// CHECK:           [[BD0:%[0-9]+]]:3 = fir.box_dims %2, %[[D2_C_DIM1]] : (!fir.box<!fir.array<2x3xf32>>, index) -> (index, index, index)
-// CHECK:           [[STR0:%[0-9]+]] = arith.divsi [[BD0]]#2, %[[D2_C_SZF32]] : index
-// CHECK:           %[[D2_C_DIM0:.*]] = arith.constant 0 : index
-// CHECK:           [[BD1:%[0-9]+]]:3 = fir.box_dims %2, %[[D2_C_DIM0]] : (!fir.box<!fir.array<2x3xf32>>, index) -> (index, index, index)
-// CHECK:           [[STR1:%[0-9]+]] = arith.divsi [[BD1]]#2, %[[D2_C_SZF32]] : index
-// 2-D embox: two box_dims strides (both / sizeof(f32)), plus pair dim (2, 1).
-// Row-major memref indices are [j, i, 0] after Fortran dim reversal.
-// CHECK:           %[[D2_C_PAIR:.*]] = arith.constant 2 : index
-// CHECK:           %[[D2_C_COMP_STRIDE:.*]] = arith.constant 1 : index
-// CHECK:           %[[D2_C_OFF:.*]] = arith.constant 0 : index
-// CHECK:           [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%[[D2_C_OFF]]], sizes: [[[BD0]]#1, [[BD1]]#1, %[[D2_C_PAIR]]], strides: [[[STR0]], [[STR1]], %[[D2_C_COMP_STRIDE]]] : memref<3x2x2xf32> to memref<?x?x?xf32, strided<
-// CHECK:           [[LOAD:%[0-9]+]] = memref.load [[VIEW]][[[IDX_J]], [[IDX_I]], %[[D2_C_RE]]] : memref<?x?x?xf32, strided<
+// 2-D shapeVec path: outer stride = inner_extent * 2 (pair slots), inner
+// stride = 2, pair stride = 1. Row-major memref indices are [j, i, 0] after
+// Fortran dim reversal.
+// CHECK-NOT:       fir.box_dims
+// CHECK:           [[STR0:%.*]] = arith.muli %c2{{.*}}, %c2{{.*}} : index
+// CHECK:           [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%c0{{.*}}], sizes: [%c3{{.*}}, %c2{{.*}}, %c2{{.*}}], strides: [[[STR0]], %c2{{.*}}, %c1{{.*}}] : memref<3x2x2xf32> to memref<?x?x?xf32, strided<
+// CHECK:           [[LOAD:%[0-9]+]] = memref.load [[VIEW]][[[IDX_J]], [[IDX_I]], %c0{{.*}}] : memref<?x?x?xf32, strided<
 func.func @projected_slice_2d(%arg0: !fir.ref<!fir.array<2x3xcomplex<f32>>>) {
   %c1 = arith.constant 1 : index
   %c2 = arith.constant 2 : index

>From 98dc5c68a7672138e60e9869405aded6db10b31e Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Wed, 24 Jun 2026 12:22:37 +0100
Subject: [PATCH 18/42] [VPlan] Introduce VPValue::user_empty (NFC) (#203518)

---
 llvm/lib/Transforms/Vectorize/VPlan.cpp       | 20 +++++-----
 llvm/lib/Transforms/Vectorize/VPlan.h         |  2 +-
 .../Transforms/Vectorize/VPlanAnalysis.cpp    |  4 +-
 .../Vectorize/VPlanConstruction.cpp           |  2 +-
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 23 +++++------
 llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp |  2 +-
 llvm/lib/Transforms/Vectorize/VPlanValue.h    |  9 +++--
 .../Transforms/Vectorize/VPlanTest.cpp        | 40 +++++++++----------
 8 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 83333aae39cf9..5f29f329baf8c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -832,7 +832,7 @@ void VPRegionBlock::dissolveToCFGLoop() {
   auto *Header = cast<VPBasicBlock>(getEntry());
   auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
   auto *CanIV = getCanonicalIV();
-  if (CanIV->getNumUsers() > 0) {
+  if (!CanIV->user_empty()) {
     VPlan &Plan = *getPlan();
     auto *Zero = Plan.getZero(CanIV->getType());
     DebugLoc DL = CanIV->getDebugLoc();
@@ -1085,38 +1085,38 @@ bool VPlan::isOuterLoop() const {
 void VPlan::printLiveIns(raw_ostream &O) const {
   VPSlotTracker SlotTracker(this);
 
-  if (VF.getNumUsers() > 0) {
+  if (!VF.user_empty()) {
     O << "\nLive-in ";
     VF.printAsOperand(O, SlotTracker);
     O << " = VF";
   }
 
-  if (UF.getNumUsers() > 0) {
+  if (!UF.user_empty()) {
     O << "\nLive-in ";
     UF.printAsOperand(O, SlotTracker);
     O << " = UF";
   }
 
-  if (VFxUF.getNumUsers() > 0) {
+  if (!VFxUF.user_empty()) {
     O << "\nLive-in ";
     VFxUF.printAsOperand(O, SlotTracker);
     O << " = VF * UF";
   }
 
-  if (VectorTripCount.getNumUsers() > 0) {
+  if (!VectorTripCount.user_empty()) {
     O << "\nLive-in ";
     VectorTripCount.printAsOperand(O, SlotTracker);
     O << " = vector-trip-count";
   }
 
-  if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
+  if (BackedgeTakenCount && !BackedgeTakenCount->user_empty()) {
     O << "\nLive-in ";
     BackedgeTakenCount->printAsOperand(O, SlotTracker);
     O << " = backedge-taken count";
   }
 
   O << "\n";
-  if (TripCount && TripCount->getNumUsers() > 0) {
+  if (TripCount && !TripCount->user_empty()) {
     if (isa<VPIRValue>(TripCount))
       O << "Live-in ";
     TripCount->printAsOperand(O, SlotTracker);
@@ -1563,11 +1563,11 @@ void VPSlotTracker::assignName(const VPValue *V) {
 }
 
 void VPSlotTracker::assignNames(const VPlan &Plan) {
-  if (Plan.VF.getNumUsers() > 0)
+  if (!Plan.VF.user_empty())
     assignName(&Plan.VF);
-  if (Plan.UF.getNumUsers() > 0)
+  if (!Plan.UF.user_empty())
     assignName(&Plan.UF);
-  if (Plan.VFxUF.getNumUsers() > 0)
+  if (!Plan.VFxUF.user_empty())
     assignName(&Plan.VFxUF);
   assignName(&Plan.VectorTripCount);
   if (Plan.BackedgeTakenCount)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 26c528a323969..f73118ac31797 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -4936,7 +4936,7 @@ class VPlan {
   /// Resets the trip count for the VPlan. The caller must make sure all uses of
   /// the original trip count have been replaced.
   void resetTripCount(VPValue *NewTripCount) {
-    assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
+    assert(TripCount && NewTripCount && TripCount->user_empty() &&
            "TripCount must be set when resetting");
     TripCount = NewTripCount;
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 54f6c602f77d2..77a33339eb5f9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -124,7 +124,7 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
   // the loop (not including non-recipe values such as arguments and
   // constants).
   SmallSetVector<VPValue *, 8> LoopInvariants;
-  if (Plan.getVectorTripCount().getNumUsers() > 0)
+  if (!Plan.getVectorTripCount().user_empty())
     LoopInvariants.insert(&Plan.getVectorTripCount());
 
   // We scan the loop in a topological order in order and assign a number to
@@ -198,7 +198,7 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
 
   VPValue *CanIV = LoopRegion->getCanonicalIV();
   // Note: canonical IVs are retained even if they have no users.
-  if (CanIV->getNumUsers() != 0)
+  if (!CanIV->user_empty())
     OpenIntervals.insert(CanIV);
 
   // We scan the instructions linearly and record each time that a new interval
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index bb967d3f3daf0..619fea8c10b4d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -1767,7 +1767,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
       continue;
     if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
       VPValue *DIVTC = DerivedIV->getOperand(1);
-      if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) {
+      if (DerivedIV->hasOneUse() && IsTC(DIVTC)) {
         auto *NewSel = MiddleBuilder.createSelect(
             AnyNaNLane, LoopRegion->getCanonicalIV(), DIVTC);
         DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index adcfe30ff9561..0c7ede5f81593 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -492,7 +492,7 @@ static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
       });
 
       // Remove phi recipes that are unused after merging the regions.
-      if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
+      if (Phi1ToMove.getVPSingleValue()->user_empty()) {
         Phi1ToMove.eraseFromParent();
         continue;
       }
@@ -548,7 +548,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
   VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
   VPBlockUtils::connectBlocks(Pred, Exiting);
 
-  if (PredRecipe->getNumUsers() != 0) {
+  if (!PredRecipe->user_empty()) {
     auto *PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask,
                                               RecipeWithoutMask->getDebugLoc());
     Exiting->appendRecipe(PHIRecipe);
@@ -802,8 +802,7 @@ static bool isDeadRecipe(VPRecipeBase &R) {
     return false;
 
   // Recipe is dead if no user keeps the recipe alive.
-  return all_of(R.definedValues(),
-                [](VPValue *V) { return V->getNumUsers() == 0; });
+  return all_of(R.definedValues(), [](VPValue *V) { return V->user_empty(); });
 }
 
 void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
@@ -894,7 +893,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
       auto *RepR = dyn_cast<VPReplicateRecipe>(U);
       // Skip recipes that shouldn't be narrowed.
       if (!Def || !isa<VPReplicateRecipe, VPWidenRecipe>(Def) ||
-          Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
+          Def->user_empty() || !Def->getUnderlyingValue() ||
           (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
         continue;
 
@@ -2063,7 +2062,7 @@ static void simplifyBlends(VPlan &Plan) {
         // TODO: Find the most expensive mask that can be deadcoded, or a mask
         // that's used by multiple blends where it can be removed from them all.
         VPValue *Mask = Blend->getMask(I);
-        if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) {
+        if (Mask->hasOneUse() && !match(Mask, m_False())) {
           StartIndex = I;
           break;
         }
@@ -2099,7 +2098,7 @@ static void simplifyBlends(VPlan &Plan) {
         NewBlend->setOperand(0, Inc1);
         NewBlend->setOperand(1, Inc0);
         NewBlend->setOperand(2, NewMask);
-        if (OldMask->getNumUsers() == 0)
+        if (OldMask->user_empty())
           cast<VPInstruction>(OldMask)->eraseFromParent();
       }
     }
@@ -5322,7 +5321,7 @@ void VPlanTransforms::materializeConstantVectorTripCount(
   assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
 
   VPValue *TC = Plan.getTripCount();
-  if (TC->getNumUsers() == 0)
+  if (TC->user_empty())
     return;
 
   // Skip cases for which the trip count may be non-trivial to materialize.
@@ -5351,7 +5350,7 @@ void VPlanTransforms::materializeConstantVectorTripCount(
 void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
                                                     VPBasicBlock *VectorPH) {
   VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
-  if (BTC->getNumUsers() == 0)
+  if (BTC->user_empty())
     return;
 
   VPBuilder Builder(VectorPH, VectorPH->begin());
@@ -5460,7 +5459,7 @@ void VPlanTransforms::materializeVectorTripCount(
   VPSymbolicValue &VectorTC = Plan.getVectorTripCount();
   // There's nothing to do if there are no users of the vector trip count or its
   // IR value has already been set.
-  if (VectorTC.getNumUsers() == 0 || VectorTC.getUnderlyingValue())
+  if (VectorTC.user_empty() || VectorTC.getUnderlyingValue())
     return;
 
   VPValue *TC = Plan.getTripCount();
@@ -5540,7 +5539,7 @@ void VPlanTransforms::materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH,
   VPValue &VFxUF = Plan.getVFxUF();
   // If there are no users of the runtime VF, compute VFxUF by constant folding
   // the multiplication of VF and UF.
-  if (VF.getNumUsers() == 0) {
+  if (VF.user_empty()) {
     VPValue *RuntimeVFxUF =
         Builder.createElementCount(TCTy, VFEC * Plan.getConcreteUF());
     VFxUF.replaceAllUsesWith(RuntimeVFxUF);
@@ -5695,7 +5694,7 @@ void VPlanTransforms::expandSCEVsToVPInstructions(VPlan &Plan,
   // late expansion.
   for (VPRecipeBase &R : make_early_inc_range(*Entry)) {
     auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
-    if (!ExpSCEV || ExpSCEV->getNumUsers() == 0)
+    if (!ExpSCEV || ExpSCEV->user_empty())
       continue;
     Builder.setInsertPoint(ExpSCEV);
     VPValue *Expanded = Expander.tryToExpand(ExpSCEV->getSCEV());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ae4beb5b71874..bcd17a54a3e31 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -946,7 +946,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
 
       auto *DefR = cast<VPSingleDefRecipe>(&R);
       VPBuilder Builder(DefR);
-      if (DefR->getNumUsers() == 0) {
+      if (DefR->user_empty()) {
         // Create single-scalar version of DefR for all lanes.
         for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
           cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 8356bcb08634f..a7aa0523ad5d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -92,7 +92,7 @@ class LLVM_ABI_FOR_TEST VPValue {
   VPValue &operator=(const VPValue &) = delete;
 
   virtual ~VPValue() {
-    assert(Users.empty() && "trying to delete a VPValue with remaining users");
+    assert(user_empty() && "trying to delete a VPValue with remaining users");
   }
 
   /// \return an ID for the concrete type of this object.
@@ -113,7 +113,7 @@ class LLVM_ABI_FOR_TEST VPValue {
   void assertNotMaterialized() const;
 
   unsigned getNumUsers() const {
-    if (Users.empty())
+    if (user_empty())
       return 0;
     assertNotMaterialized();
     return Users.size();
@@ -158,10 +158,11 @@ class LLVM_ABI_FOR_TEST VPValue {
   const_user_range users() const {
     return const_user_range(user_begin(), user_end());
   }
+  bool user_empty() const { return Users.empty(); } // NOLINT
 
   /// Returns true if the value has more than one unique user.
   bool hasMoreThanOneUniqueUser() const {
-    if (getNumUsers() == 0)
+    if (user_empty())
       return false;
 
     // Check if all users match the first user.
@@ -523,7 +524,7 @@ class VPDef {
     for (VPRecipeValue *D : to_vector(DefinedValues)) {
       assert(D->isDefinedBy(this) &&
              "all defined VPValues should point to the containing VPDef");
-      assert(D->getNumUsers() == 0 &&
+      assert(D->user_empty() &&
              "all defined VPValues should have no more users");
       delete D;
     }
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 723977595938f..2deb7c6c864b0 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -197,24 +197,24 @@ TEST_F(VPInstructionTest, setOperand) {
   VPInstruction *I1 =
       new VPInstruction(Instruction::Add, {VPV1, VPV2},
                         VPIRFlags::getDefaultFlags(Instruction::Add));
-  EXPECT_EQ(1u, VPV1->getNumUsers());
+  EXPECT_TRUE(VPV1->hasOneUse());
   EXPECT_EQ(I1, *VPV1->user_begin());
-  EXPECT_EQ(1u, VPV2->getNumUsers());
+  EXPECT_TRUE(VPV2->hasOneUse());
   EXPECT_EQ(I1, *VPV2->user_begin());
 
   // Replace operand 0 (VPV1) with VPV3.
   VPValue *VPV3 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
   I1->setOperand(0, VPV3);
-  EXPECT_EQ(0u, VPV1->getNumUsers());
-  EXPECT_EQ(1u, VPV2->getNumUsers());
+  EXPECT_TRUE(VPV1->user_empty());
+  EXPECT_TRUE(VPV2->hasOneUse());
   EXPECT_EQ(I1, *VPV2->user_begin());
-  EXPECT_EQ(1u, VPV3->getNumUsers());
+  EXPECT_TRUE(VPV3->hasOneUse());
   EXPECT_EQ(I1, *VPV3->user_begin());
 
   // Replace operand 1 (VPV2) with VPV3.
   I1->setOperand(1, VPV3);
-  EXPECT_EQ(0u, VPV1->getNumUsers());
-  EXPECT_EQ(0u, VPV2->getNumUsers());
+  EXPECT_TRUE(VPV1->user_empty());
+  EXPECT_TRUE(VPV2->user_empty());
   EXPECT_EQ(2u, VPV3->getNumUsers());
   EXPECT_EQ(I1, *VPV3->user_begin());
   EXPECT_EQ(I1, *std::next(VPV3->user_begin()));
@@ -222,13 +222,13 @@ TEST_F(VPInstructionTest, setOperand) {
   // Replace operand 0 (VPV3) with VPV4.
   VPValue *VPV4 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 4));
   I1->setOperand(0, VPV4);
-  EXPECT_EQ(1u, VPV3->getNumUsers());
+  EXPECT_TRUE(VPV3->hasOneUse());
   EXPECT_EQ(I1, *VPV3->user_begin());
   EXPECT_EQ(I1, *VPV4->user_begin());
 
   // Replace operand 1 (VPV3) with VPV4.
   I1->setOperand(1, VPV4);
-  EXPECT_EQ(0u, VPV3->getNumUsers());
+  EXPECT_TRUE(VPV3->user_empty());
   EXPECT_EQ(I1, *VPV4->user_begin());
   EXPECT_EQ(I1, *std::next(VPV4->user_begin()));
 
@@ -248,18 +248,18 @@ TEST_F(VPInstructionTest, replaceAllUsesWith) {
   VPV1->replaceAllUsesWith(VPV3);
   EXPECT_EQ(VPV3, I1->getOperand(0));
   EXPECT_EQ(VPV2, I1->getOperand(1));
-  EXPECT_EQ(0u, VPV1->getNumUsers());
-  EXPECT_EQ(1u, VPV2->getNumUsers());
+  EXPECT_TRUE(VPV1->user_empty());
+  EXPECT_TRUE(VPV2->hasOneUse());
   EXPECT_EQ(I1, *VPV2->user_begin());
-  EXPECT_EQ(1u, VPV3->getNumUsers());
+  EXPECT_TRUE(VPV3->hasOneUse());
   EXPECT_EQ(I1, *VPV3->user_begin());
 
   // Replace all uses of VPV2 with VPV3.
   VPV2->replaceAllUsesWith(VPV3);
   EXPECT_EQ(VPV3, I1->getOperand(0));
   EXPECT_EQ(VPV3, I1->getOperand(1));
-  EXPECT_EQ(0u, VPV1->getNumUsers());
-  EXPECT_EQ(0u, VPV2->getNumUsers());
+  EXPECT_TRUE(VPV1->user_empty());
+  EXPECT_TRUE(VPV2->user_empty());
   EXPECT_EQ(2u, VPV3->getNumUsers());
   EXPECT_EQ(I1, *VPV3->user_begin());
 
@@ -269,8 +269,8 @@ TEST_F(VPInstructionTest, replaceAllUsesWith) {
   EXPECT_EQ(VPV1, I1->getOperand(1));
   EXPECT_EQ(2u, VPV1->getNumUsers());
   EXPECT_EQ(I1, *VPV1->user_begin());
-  EXPECT_EQ(0u, VPV2->getNumUsers());
-  EXPECT_EQ(0u, VPV3->getNumUsers());
+  EXPECT_TRUE(VPV2->user_empty());
+  EXPECT_TRUE(VPV3->user_empty());
 
   VPInstruction *I2 =
       new VPInstruction(Instruction::Add, {VPV1, VPV2},
@@ -291,15 +291,15 @@ TEST_F(VPInstructionTest, releaseOperandsAtDeletion) {
       new VPInstruction(Instruction::Add, {VPV1, VPV2},
                         VPIRFlags::getDefaultFlags(Instruction::Add));
 
-  EXPECT_EQ(1u, VPV1->getNumUsers());
+  EXPECT_TRUE(VPV1->hasOneUse());
   EXPECT_EQ(I1, *VPV1->user_begin());
-  EXPECT_EQ(1u, VPV2->getNumUsers());
+  EXPECT_TRUE(VPV2->hasOneUse());
   EXPECT_EQ(I1, *VPV2->user_begin());
 
   delete I1;
 
-  EXPECT_EQ(0u, VPV1->getNumUsers());
-  EXPECT_EQ(0u, VPV2->getNumUsers());
+  EXPECT_TRUE(VPV1->user_empty());
+  EXPECT_TRUE(VPV2->user_empty());
 }
 
 using VPBasicBlockTest = VPlanTestBase;

>From e646fa6c1cebdddc4bdb81703c95ad5ef61c286d Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Wed, 24 Jun 2026 04:25:36 -0700
Subject: [PATCH 19/42] [Allocator] Drop the fast-path null check via a
 sentinel End (#205485)

Follow-up to #203718. Store `End` as the slab end plus 1 (and 0 for an
empty or moved-from allocator). This removes one condition from the fast
path.

For lld/ELF SymbolTable.cpp (clang++ -O3), the inlined `make<T>()` fast
path loses its `test rax, rax; je` pair; the whole TU's .text shrinks
from 14037 to 13800 bytes.

Aided by Claude Opus 4.8
---
 llvm/include/llvm/Support/Allocator.h | 30 ++++++++++++++-------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index f58f73227a2a9..92027cceea3bf 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -95,10 +95,11 @@ class BumpPtrAllocatorImpl
   // slabs as a matter of correctness.
   BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old)
       : AllocTy(std::move(Old.getAllocator())), CurPtr(Old.CurPtr),
-        End(Old.End), Slabs(std::move(Old.Slabs)),
+        EndSentinel(Old.EndSentinel), Slabs(std::move(Old.Slabs)),
         CustomSizedSlabs(std::move(Old.CustomSizedSlabs)),
         BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) {
-    Old.CurPtr = Old.End = nullptr;
+    Old.CurPtr = nullptr;
+    Old.EndSentinel = 0;
     Old.BytesAllocated = 0;
     Old.Slabs.clear();
     Old.CustomSizedSlabs.clear();
@@ -114,14 +115,15 @@ class BumpPtrAllocatorImpl
     DeallocateCustomSizedSlabs();
 
     CurPtr = RHS.CurPtr;
-    End = RHS.End;
+    EndSentinel = RHS.EndSentinel;
     BytesAllocated = RHS.BytesAllocated;
     RedZoneSize = RHS.RedZoneSize;
     Slabs = std::move(RHS.Slabs);
     CustomSizedSlabs = std::move(RHS.CustomSizedSlabs);
     AllocTy::operator=(std::move(RHS.getAllocator()));
 
-    RHS.CurPtr = RHS.End = nullptr;
+    RHS.CurPtr = nullptr;
+    RHS.EndSentinel = 0;
     RHS.BytesAllocated = 0;
     RHS.Slabs.clear();
     RHS.CustomSizedSlabs.clear();
@@ -141,7 +143,7 @@ class BumpPtrAllocatorImpl
     // Reset the state.
     BytesAllocated = 0;
     CurPtr = (char *)Slabs.front();
-    End = CurPtr + SlabSize;
+    EndSentinel = uintptr_t(CurPtr) + SlabSize + 1;
 
     __asan_poison_memory_region(*Slabs.begin(), computeSlabSize(0));
     DeallocateSlabs(std::next(Slabs.begin()), Slabs.end());
@@ -174,10 +176,9 @@ class BumpPtrAllocatorImpl
     assert(AllocEndPtr >= uintptr_t(CurPtr) &&
            "Alignment + Size must not overflow");
 
-    // Check if we have enough space.
-    if (LLVM_LIKELY(AllocEndPtr <= uintptr_t(End)
-                    // We can't return nullptr even for a zero-sized allocation!
-                    && CurPtr != nullptr)) {
+    // Check if we have enough space. `EndSentinel` is 0 for an empty allocator,
+    // so this also rejects a null CurPtr when `SizeToAllocate` is 0.
+    if (LLVM_LIKELY(AllocEndPtr < EndSentinel)) {
       CurPtr = reinterpret_cast<char *>(AllocEndPtr);
       // Update the allocation point of this memory block in MemorySanitizer.
       // Without this, MemorySanitizer messages for values originated from here
@@ -214,7 +215,7 @@ class BumpPtrAllocatorImpl
     // Otherwise, start a new slab and try again.
     StartNewSlab();
     uintptr_t AlignedAddr = alignAddr(CurPtr, Alignment);
-    assert(AlignedAddr + SizeToAllocate <= (uintptr_t)End &&
+    assert(AlignedAddr + SizeToAllocate < EndSentinel &&
            "Unable to allocate memory!");
     char *AlignedPtr = (char*)AlignedAddr;
     CurPtr = AlignedPtr + SizeToAllocate;
@@ -324,8 +325,9 @@ class BumpPtrAllocatorImpl
   /// This points to the next free byte in the slab.
   char *CurPtr = nullptr;
 
-  /// The end of the current slab.
-  char *End = nullptr;
+  /// One past the slab end (0 when there is no slab). +1 is so that the fast
+  /// path condition also rejects a empty allocator with a 0-size allocation.
+  uintptr_t EndSentinel = 0;
 
   /// The slabs allocated so far.
   SmallVector<void *, 4> Slabs;
@@ -352,7 +354,7 @@ class BumpPtrAllocatorImpl
   }
 
   /// Allocate a new slab and move the bump pointers over into the new
-  /// slab, modifying CurPtr and End.
+  /// slab, modifying CurPtr and EndSentinel.
   void StartNewSlab() {
     size_t AllocatedSlabSize = computeSlabSize(Slabs.size());
 
@@ -364,7 +366,7 @@ class BumpPtrAllocatorImpl
 
     Slabs.push_back(NewSlab);
     CurPtr = (char *)(NewSlab);
-    End = ((char *)NewSlab) + AllocatedSlabSize;
+    EndSentinel = uintptr_t(NewSlab) + AllocatedSlabSize + 1;
   }
 
   /// Deallocate a sequence of slabs.

>From 846793e2a57a39c90a4e57a61ea940127bbabf1a Mon Sep 17 00:00:00 2001
From: Abid Qadeer <haqadeer at amd.com>
Date: Wed, 24 Jun 2026 12:37:29 +0100
Subject: [PATCH 20/42] [flang][OpenMP][NFC] Hoist variant match-info
 construction into Semantics (#204387)

Replace the lowering-only `makeVariantMatchInfo` helper with a single
shared `semantics::omp::MakeVariantMatchInfo`. It builds the
VariantMatchInfo from a parsed context selector and returns the optional
non-constant user condition (as before). Update metadirective lowering
to use it and drop the duplicated Lower/OpenMP copy.

Selector features that variant selection cannot yet honour
(target_device selectors, and clause/extension trait properties) are not
match-info concerns, so they are kept out of `MakeVariantMatchInfo`.
Detection lives in a separate, pure helper
`FindUnsupportedSelectorFeature`; the caller diagnoses the feature in
its own terms (metadirective lowering emits a TODO) before building the
match info. `MakeVariantMatchInfo` checks the precondition. NFC for
metadirective.

Co-authored-by: Cursor

---------

Co-authored-by: Cursor <cursoragent at cursor.com>
---
 flang/include/flang/Semantics/openmp-utils.h |  40 +++++++
 flang/lib/Lower/OpenMP/OpenMP.cpp            |  32 ++++--
 flang/lib/Lower/OpenMP/Utils.cpp             | 111 -------------------
 flang/lib/Lower/OpenMP/Utils.h               |  19 +---
 flang/lib/Semantics/openmp-utils.cpp         | 107 ++++++++++++++++++
 5 files changed, 173 insertions(+), 136 deletions(-)

diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h
index e0358eafe487c..d2bfeca68bf84 100644
--- a/flang/include/flang/Semantics/openmp-utils.h
+++ b/flang/include/flang/Semantics/openmp-utils.h
@@ -163,6 +163,46 @@ std::optional<bool> GetLogicalArgument(
 std::optional<bool> IsContiguous(
     SemanticsContext &semaCtx, const parser::OmpObject &object);
 
+/// Non-constant user condition expression and source for runtime lowering.
+struct DynamicUserCondition {
+  const parser::ScalarExpr *expr;
+  parser::CharBlock source;
+};
+
+/// A context-selector feature that variant matching accepts syntactically but
+/// cannot yet honour during selection. Callers are expected to diagnose these
+/// (a lowering \c TODO or a semantic error) before calling
+/// \c MakeVariantMatchInfo, which asserts none are present.
+enum class UnsupportedSelectorFeature {
+  None,
+  /// A `target_device={...}` selector set.
+  TargetDevice,
+  /// A clause property (e.g. \c simdlen(8) in \c construct={simd(simdlen(8))})
+  /// or an extension property (e.g. \c foo(bar) in
+  /// \c implementation={my_trait(foo(bar))}).
+  ClauseOrExtensionProperty,
+};
+
+/// Scan a parsed context selector for the first feature that variant matching
+/// cannot yet honour (see \c UnsupportedSelectorFeature). Pure detection: emits
+/// no diagnostics and has no side effects on any match info.
+UnsupportedSelectorFeature FindUnsupportedSelectorFeature(
+    const parser::traits::OmpContextSelectorSpecification &ctxSel,
+    SemanticsContext &semaCtx);
+
+/// Populate \p vmi from a parsed context selector. Score modifiers are
+/// honoured (including on `condition(...)` selectors). Constant user
+/// conditions are folded into user_condition_true/false traits; a non-constant
+/// user condition is recorded as user_condition_unknown and the first such
+/// expression is returned for the caller to lower as a runtime condition.
+///
+/// The caller must first reject unsupported selector features (see
+/// \c FindUnsupportedSelectorFeature); this function asserts none are present.
+std::optional<DynamicUserCondition> MakeVariantMatchInfo(
+    llvm::omp::VariantMatchInfo &vmi,
+    const parser::traits::OmpContextSelectorSpecification &ctxSel,
+    SemanticsContext &semaCtx);
+
 std::vector<SomeExpr> GetTopLevelDesignators(const SomeExpr &expr);
 const SomeExpr *HasStorageOverlap(
     const SomeExpr &base, llvm::ArrayRef<SomeExpr> exprs);
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 094cec737d481..4b83ac68ebf44 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -4786,18 +4786,18 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable,
 
 namespace {
 struct MetadirectiveCandidate {
-  MetadirectiveCandidate(
-      const parser::OmpDirectiveSpecification *spec,
-      llvm::omp::VariantMatchInfo vmi, bool isExplicit,
-      std::optional<DynamicUserCondition> dynamicCond = std::nullopt,
-      bool conditionShouldBeTrue = true)
+  MetadirectiveCandidate(const parser::OmpDirectiveSpecification *spec,
+                         llvm::omp::VariantMatchInfo vmi, bool isExplicit,
+                         std::optional<semantics::omp::DynamicUserCondition>
+                             dynamicCond = std::nullopt,
+                         bool conditionShouldBeTrue = true)
       : spec(spec), vmi(vmi), isExplicit(isExplicit), dynamicCond(dynamicCond),
         conditionShouldBeTrue(conditionShouldBeTrue) {}
 
   const parser::OmpDirectiveSpecification *spec = nullptr;
   llvm::omp::VariantMatchInfo vmi;
   bool isExplicit = false;
-  std::optional<DynamicUserCondition> dynamicCond;
+  std::optional<semantics::omp::DynamicUserCondition> dynamicCond;
   bool conditionShouldBeTrue = true;
 };
 } // namespace
@@ -4855,9 +4855,25 @@ static void genMetadirective(lower::AbstractConverter &converter,
       const auto &ctxSel = getContextSelector(*whenClause);
       auto [spec, isExplicit] = getDirectiveVariant(*whenClause);
 
+      // METADIRECTIVE cannot yet honour some selector features that are
+      // otherwise accepted; reject them before building the match info.
+      switch (semantics::omp::FindUnsupportedSelectorFeature(ctxSel, semaCtx)) {
+      case semantics::omp::UnsupportedSelectorFeature::TargetDevice:
+        TODO(converter.genLocation(clause.source),
+             "target_device selector in METADIRECTIVE");
+        break;
+      case semantics::omp::UnsupportedSelectorFeature::
+          ClauseOrExtensionProperty:
+        TODO(converter.genLocation(clause.source),
+             "clause or extension trait matching in METADIRECTIVE");
+        break;
+      case semantics::omp::UnsupportedSelectorFeature::None:
+        break;
+      }
+
       llvm::omp::VariantMatchInfo rawVMI;
-      std::optional<DynamicUserCondition> dynamicCond = makeVariantMatchInfo(
-          rawVMI, ctxSel, semaCtx, converter.genLocation(clause.source));
+      std::optional<semantics::omp::DynamicUserCondition> dynamicCond =
+          semantics::omp::MakeVariantMatchInfo(rawVMI, ctxSel, semaCtx);
 
       if (dynamicCond) {
         constexpr llvm::omp::TraitProperty dynamicConditionTrait =
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 7a532e10f1a1e..382292b6c6c13 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -1286,117 +1286,6 @@ mlir::Value genIteratorCoordinate(Fortran::lower::AbstractConverter &converter,
                                   /*typeparams=*/mlir::ValueRange{});
 }
 
-/// Collect trait property names (vendor, kind, arch, isa, etc.) into a VMI.
-static void processTraitProperties(
-    llvm::omp::VariantMatchInfo &vmi, llvm::omp::TraitSet set,
-    llvm::omp::TraitSelector selector,
-    const std::optional<parser::OmpTraitSelector::Properties> &props,
-    llvm::APInt *scorePtr, mlir::Location loc) {
-  if (!props)
-    return;
-
-  for (const auto &prop :
-       std::get<std::list<parser::OmpTraitProperty>>(props->t)) {
-    const auto *name = std::get_if<parser::OmpTraitPropertyName>(&prop.u);
-    // Clause properties and extension properties (e.g. `simdlen(8)` in
-    // `construct={simd(simdlen(8))}`) and `foo(bar)` in
-    // `implementation={my_trait(foo(bar))}` are not matched yet.
-    if (!name)
-      TODO(loc, "clause or extension trait matching in METADIRECTIVE");
-  }
-  semantics::omp::ProcessTraitProperties(vmi, set, selector, props, scorePtr);
-}
-
-/// Process user={condition(...)} trait properties. Constant conditions are
-/// resolved to user_condition_true/false. Non-constant conditions are marked
-/// as user_condition_unknown and returned for later use in fir.if lowering.
-static std::optional<DynamicUserCondition> processUserConditionTrait(
-    llvm::omp::VariantMatchInfo &vmi,
-    const std::optional<parser::OmpTraitSelector::Properties> &props,
-    semantics::SemanticsContext &semaCtx, llvm::APInt *scorePtr) {
-  std::optional<DynamicUserCondition> dynamicCond;
-  if (!props)
-    return dynamicCond;
-
-  for (const auto &prop :
-       std::get<std::list<parser::OmpTraitProperty>>(props->t)) {
-    const auto *scalarExpr = std::get_if<parser::ScalarExpr>(&prop.u);
-    if (!scalarExpr)
-      continue;
-
-    if (auto constValue =
-            semantics::omp::EvaluateUserCondition(semaCtx, *scalarExpr)) {
-      vmi.addTrait(*constValue ? llvm::omp::TraitProperty::user_condition_true
-                               : llvm::omp::TraitProperty::user_condition_false,
-                   "<condition>", scorePtr);
-      continue;
-    }
-
-    dynamicCond = DynamicUserCondition{scalarExpr, prop.source};
-    vmi.addTrait(llvm::omp::TraitProperty::user_condition_unknown,
-                 "<condition>", scorePtr);
-  }
-
-  return dynamicCond;
-}
-
-/// Populate a VariantMatchInfo from context selector.
-/// For user conditions, attempts constant folding. Non-constant conditions
-/// are recorded as user_condition_unknown and returned for later use in
-/// fir.if lowering.
-std::optional<DynamicUserCondition>
-makeVariantMatchInfo(llvm::omp::VariantMatchInfo &vmi,
-                     const parser::modifier::OmpContextSelector &ctxSel,
-                     semantics::SemanticsContext &semaCtx, mlir::Location loc) {
-  std::optional<DynamicUserCondition> dynamicCond;
-
-  for (const auto &traitSet : ctxSel.v) {
-    using TSSName = parser::OmpTraitSetSelectorName;
-    auto setName = std::get<TSSName>(traitSet.t).v;
-    llvm::omp::TraitSet set = semantics::omp::MapTraitSet(setName);
-
-    for (const auto &trait :
-         std::get<std::list<parser::OmpTraitSelector>>(traitSet.t)) {
-      const auto &selectorName =
-          std::get<parser::OmpTraitSelectorName>(trait.t);
-      llvm::omp::TraitSelector selector =
-          semantics::omp::MapTraitSelector(selectorName, set);
-      const auto &props =
-          std::get<std::optional<parser::OmpTraitSelector::Properties>>(
-              trait.t);
-
-      // target_device selectors require runtime target device queries not yet
-      // supported.
-      if (set == llvm::omp::TraitSet::target_device)
-        TODO(loc, "target_device selector in METADIRECTIVE");
-
-      std::optional<llvm::APInt> score;
-      llvm::APInt *scorePtr =
-          semantics::omp::GetTraitScore(props, semaCtx, score);
-
-      if (selector == llvm::omp::TraitSelector::user_condition) {
-        if (std::optional<DynamicUserCondition> userCond =
-                processUserConditionTrait(vmi, props, semaCtx, scorePtr))
-          dynamicCond = userCond;
-        continue;
-      }
-
-      processTraitProperties(vmi, set, selector, props, scorePtr, loc);
-
-      if (props || set != llvm::omp::TraitSet::construct)
-        continue;
-
-      // Construct traits with no properties: the selector is the property.
-      llvm::omp::TraitProperty propKind =
-          llvm::omp::getOpenMPContextTraitPropertyForSelector(selector);
-      if (propKind != llvm::omp::TraitProperty::invalid)
-        vmi.addTrait(set, propKind, selectorName.ToString(), scorePtr);
-    }
-  }
-
-  return dynamicCond;
-}
-
 // ---------------------------------------------------------------------------
 // FlangOMPContext — shared OMPContext for metadirective variant-matching
 // ---------------------------------------------------------------------------
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index e7f477ff44739..efe6c963a3778 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -258,27 +258,12 @@ std::optional<llvm::SmallVector<mlir::Value>> getIteratorElementIndices(
 
 /// Walk the already-emitted MLIR parent operations starting from \p op and
 /// collect the implied OpenMP construct traits in outermost-to-innermost
-/// order. Used by metadirective lowering to build the `ConstructTraits` of an
-/// `OMPContext`.
+/// order. Used by metadirective lowering and declare-variant call resolution
+/// to build the `ConstructTraits` of an `OMPContext`.
 void collectEnclosingConstructTraits(
     mlir::Operation *op,
     llvm::SmallVectorImpl<llvm::omp::TraitProperty> &constructTraits);
 
-/// Non-constant user condition expression and source for runtime lowering.
-struct DynamicUserCondition {
-  const parser::ScalarExpr *expr;
-  parser::CharBlock source;
-};
-
-/// Populate \p vmi from a parsed OpenMP context selector. Constant user
-/// conditions are folded into user_condition_true/false traits. A non-constant
-/// user condition is recorded as user_condition_unknown and returned for later
-/// lowering as a runtime condition.
-std::optional<DynamicUserCondition>
-makeVariantMatchInfo(llvm::omp::VariantMatchInfo &vmi,
-                     const parser::modifier::OmpContextSelector &ctxSel,
-                     semantics::SemanticsContext &semaCtx, mlir::Location loc);
-
 /// `OMPContext` flavour used by Flang's OpenMP variant matching. Adds an
 /// ISA-trait override based on the module's target-features attribute.
 class FlangOMPContext final : public llvm::omp::OMPContext {
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index b02ef81176a63..1aa27a5fe6074 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -2230,4 +2230,111 @@ void ProcessTraitProperties(llvm::omp::VariantMatchInfo &vmi,
   }
 }
 
+UnsupportedSelectorFeature FindUnsupportedSelectorFeature(
+    const parser::traits::OmpContextSelectorSpecification &ctxSel,
+    SemanticsContext &semaCtx) {
+  for (const parser::OmpTraitSetSelector &traitSet : ctxSel.v) {
+    using TSSName = parser::OmpTraitSetSelectorName;
+    auto setName{std::get<TSSName>(traitSet.t).v};
+    if (MapTraitSet(setName) == llvm::omp::TraitSet::target_device) {
+      return UnsupportedSelectorFeature::TargetDevice;
+    }
+
+    for (const parser::OmpTraitSelector &selector :
+        std::get<std::list<parser::OmpTraitSelector>>(traitSet.t)) {
+      const auto &props{
+          std::get<std::optional<parser::OmpTraitSelector::Properties>>(
+              selector.t)};
+      if (!props) {
+        continue;
+      }
+      for (const auto &prop :
+          std::get<std::list<parser::OmpTraitProperty>>(props->t)) {
+        if (std::holds_alternative<common::Indirection<parser::OmpClause>>(
+                prop.u) ||
+            std::holds_alternative<parser::OmpTraitPropertyExtension>(prop.u)) {
+          return UnsupportedSelectorFeature::ClauseOrExtensionProperty;
+        }
+      }
+    }
+  }
+  return UnsupportedSelectorFeature::None;
+}
+
+static void AddTraitPropertiesFromSelector(llvm::omp::TraitSet set,
+    const parser::OmpTraitSelector &selector, llvm::omp::VariantMatchInfo &vmi,
+    SemanticsContext &semaCtx,
+    std::optional<DynamicUserCondition> &dynamicCond) {
+  const auto &traitName{std::get<parser::OmpTraitSelectorName>(selector.t)};
+  const auto &props{
+      std::get<std::optional<parser::OmpTraitSelector::Properties>>(
+          selector.t)};
+
+  std::optional<llvm::APInt> scoreStorage;
+  llvm::APInt *scorePtr{GetTraitScore(props, semaCtx, scoreStorage)};
+
+  // user={condition(...)}: constant-fold to user_condition_true/false. A
+  // non-constant expression is recorded as user_condition_unknown and the
+  // first such expression is captured for later runtime lowering.
+  llvm::omp::TraitSelector selectorKind{MapTraitSelector(traitName, set)};
+  if (selectorKind == llvm::omp::TraitSelector::user_condition) {
+    if (!props) {
+      return;
+    }
+    for (const auto &prop :
+        std::get<std::list<parser::OmpTraitProperty>>(props->t)) {
+      const auto *scalarExpr{std::get_if<parser::ScalarExpr>(&prop.u)};
+      if (!scalarExpr) {
+        continue;
+      }
+      if (auto constValue{EvaluateUserCondition(semaCtx, *scalarExpr)}) {
+        vmi.addTrait(set,
+            *constValue ? llvm::omp::TraitProperty::user_condition_true
+                        : llvm::omp::TraitProperty::user_condition_false,
+            "<condition>", scorePtr);
+        continue;
+      }
+      if (!dynamicCond) {
+        dynamicCond = DynamicUserCondition{scalarExpr, prop.source};
+      }
+      vmi.addTrait(set, llvm::omp::TraitProperty::user_condition_unknown,
+          "<condition>", scorePtr);
+    }
+    return;
+  }
+
+  ProcessTraitProperties(vmi, set, selectorKind, props, scorePtr);
+
+  if (props || set != llvm::omp::TraitSet::construct) {
+    return;
+  }
+
+  // Construct trait selector with no properties (e.g. `construct={simd}`):
+  // the selector itself implies the property.
+  llvm::omp::TraitProperty propKind{
+      llvm::omp::getOpenMPContextTraitPropertyForSelector(selectorKind)};
+  if (propKind != llvm::omp::TraitProperty::invalid) {
+    vmi.addTrait(set, propKind, traitName.ToString(), scorePtr);
+  }
+}
+
+std::optional<DynamicUserCondition> MakeVariantMatchInfo(
+    llvm::omp::VariantMatchInfo &vmi,
+    const parser::traits::OmpContextSelectorSpecification &ctxSel,
+    SemanticsContext &semaCtx) {
+  CHECK(FindUnsupportedSelectorFeature(ctxSel, semaCtx) ==
+      UnsupportedSelectorFeature::None);
+  std::optional<DynamicUserCondition> dynamicCond;
+  for (const parser::OmpTraitSetSelector &traitSet : ctxSel.v) {
+    using TSSName = parser::OmpTraitSetSelectorName;
+    auto setName{std::get<TSSName>(traitSet.t).v};
+    llvm::omp::TraitSet set{MapTraitSet(setName)};
+
+    for (const parser::OmpTraitSelector &selector :
+        std::get<std::list<parser::OmpTraitSelector>>(traitSet.t)) {
+      AddTraitPropertiesFromSelector(set, selector, vmi, semaCtx, dynamicCond);
+    }
+  }
+  return dynamicCond;
+}
 } // namespace Fortran::semantics::omp

>From a62cd2c465310718736956f24cd4d0d5f6cb27b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= <benicsbalazs at gmail.com>
Date: Wed, 24 Jun 2026 13:42:42 +0200
Subject: [PATCH 21/42] [analyzer][NFC] Take BugReport descriptions as Twine
 instead of StringRef (#205527)

The constructors of `BugReport`, `BasicBugReport`, and
`PathSensitiveBugReport` previously took the description (and short
description) as `StringRef`. The base class always copies into a
`std::string` member regardless, so taking `const llvm::Twine &` is
strictly more flexible at no storage cost: callers can keep passing
string literals, `StringRef`, `std::string`, `SmallString::str()`, or
`formatv(...).str()` exactly as before, and now they can also pass a
`Twine` concatenation directly without first materializing a temporary
through `SmallString` + `raw_svector_ostream` or `+`/`formatv`.

Assisted-By: claude
---
 .../Core/BugReporter/BugReporter.h            | 30 ++++++++-------
 .../Checkers/CXXDeleteChecker.cpp             | 16 ++++----
 .../Checkers/MacOSKeychainAPIChecker.cpp      | 37 ++++++++-----------
 .../Checkers/NonNullParamChecker.cpp          | 14 +++----
 .../Checkers/UndefCapturedBlockVarChecker.cpp | 17 +++------
 .../Checkers/UndefinedNewArraySizeChecker.cpp |  9 +----
 .../Checkers/UnixAPIChecker.cpp               | 10 ++---
 .../Checkers/VLASizeChecker.cpp               | 12 +++---
 clang/lib/StaticAnalyzer/Core/BugReporter.cpp |  2 +-
 9 files changed, 61 insertions(+), 86 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
index 6d2de7a27608c..51c54151ac07b 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
@@ -33,6 +33,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
@@ -132,13 +133,13 @@ class BugReport {
   SmallVector<std::shared_ptr<PathDiagnosticNotePiece>, 4> Notes;
   SmallVector<FixItHint, 4> Fixits;
 
-  BugReport(Kind kind, const BugType &bt, StringRef desc)
+  BugReport(Kind kind, const BugType &bt, const llvm::Twine &desc)
       : BugReport(kind, bt, "", desc) {}
 
-  BugReport(Kind K, const BugType &BT, StringRef ShortDescription,
-            StringRef Description)
-      : K(K), BT(BT), ShortDescription(ShortDescription),
-        Description(Description) {}
+  BugReport(Kind K, const BugType &BT, const llvm::Twine &ShortDescription,
+            const llvm::Twine &Description)
+      : K(K), BT(BT), ShortDescription(ShortDescription.str()),
+        Description(Description.str()) {}
 
 public:
   virtual ~BugReport() = default;
@@ -252,11 +253,12 @@ class BasicBugReport : public BugReport {
   const Decl *DeclWithIssue = nullptr;
 
 public:
-  BasicBugReport(const BugType &bt, StringRef desc, PathDiagnosticLocation l)
+  BasicBugReport(const BugType &bt, const llvm::Twine &desc,
+                 PathDiagnosticLocation l)
       : BugReport(Kind::Basic, bt, desc), Location(l) {}
 
-  BasicBugReport(const BugType &BT, StringRef ShortDesc, StringRef Desc,
-                 PathDiagnosticLocation L)
+  BasicBugReport(const BugType &BT, const llvm::Twine &ShortDesc,
+                 const llvm::Twine &Desc, PathDiagnosticLocation L)
       : BugReport(Kind::Basic, BT, ShortDesc, Desc), Location(L) {}
 
   static bool classof(const BugReport *R) {
@@ -369,12 +371,12 @@ class PathSensitiveBugReport : public BugReport {
       StackHints;
 
 public:
-  PathSensitiveBugReport(const BugType &bt, StringRef desc,
+  PathSensitiveBugReport(const BugType &bt, const llvm::Twine &desc,
                          const ExplodedNode *errorNode)
       : PathSensitiveBugReport(bt, desc, desc, errorNode) {}
 
-  PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc,
-                         const ExplodedNode *errorNode)
+  PathSensitiveBugReport(const BugType &bt, const llvm::Twine &shortDesc,
+                         const llvm::Twine &desc, const ExplodedNode *errorNode)
       : PathSensitiveBugReport(bt, shortDesc, desc, errorNode,
                                /*LocationToUnique*/ {},
                                /*DeclToUnique*/ nullptr) {}
@@ -386,15 +388,15 @@ class PathSensitiveBugReport : public BugReport {
   /// to the user. This method allows to rest the location which should be used
   /// for uniquing reports. For example, memory leaks checker, could set this to
   /// the allocation site, rather then the location where the bug is reported.
-  PathSensitiveBugReport(const BugType &bt, StringRef desc,
+  PathSensitiveBugReport(const BugType &bt, const llvm::Twine &desc,
                          const ExplodedNode *errorNode,
                          PathDiagnosticLocation LocationToUnique,
                          const Decl *DeclToUnique)
       : PathSensitiveBugReport(bt, desc, desc, errorNode, LocationToUnique,
                                DeclToUnique) {}
 
-  PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc,
-                         const ExplodedNode *errorNode,
+  PathSensitiveBugReport(const BugType &bt, const llvm::Twine &shortDesc,
+                         const llvm::Twine &desc, const ExplodedNode *errorNode,
                          PathDiagnosticLocation LocationToUnique,
                          const Decl *DeclToUnique);
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp
index bfab91dd67919..0bc628eaabe8c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp
@@ -156,19 +156,17 @@ void CXXArrayDeleteChecker::checkTypedDeleteExpr(
   if (!N)
     return;
 
-  SmallString<256> Buf;
-  llvm::raw_svector_ostream OS(Buf);
-
   QualType SourceType = BaseClassRegion->getValueType();
   QualType TargetType =
       DerivedClassRegion->getSymbol()->getType()->getPointeeType();
 
-  OS << "Deleting an array of '" << TargetType.getAsString()
-     << "' objects as their base class '"
-     << SourceType.getAsString(C.getASTContext().getPrintingPolicy())
-     << "' is undefined";
-
-  auto R = std::make_unique<PathSensitiveBugReport>(BT, OS.str(), N);
+  auto R = std::make_unique<PathSensitiveBugReport>(
+      BT,
+      "Deleting an array of '" + Twine(TargetType.getAsString()) +
+          "' objects as their base class '" +
+          SourceType.getAsString(C.getASTContext().getPrintingPolicy()) +
+          "' is undefined",
+      N);
 
   // Mark region of problematic base class for later use in the BugVisitor.
   R->markInteresting(BaseClassRegion);
diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
index 5d4a8b6b24766..02794da032f2f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
@@ -19,8 +19,6 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <optional>
 
 using namespace clang;
@@ -226,14 +224,14 @@ void MacOSKeychainAPIChecker::
 
   if (!N)
     return;
-  SmallString<80> sbuf;
-  llvm::raw_svector_ostream os(sbuf);
   unsigned int PDeallocIdx =
-               FunctionsToTrack[AP.second->AllocatorIdx].DeallocatorIdx;
+      FunctionsToTrack[AP.second->AllocatorIdx].DeallocatorIdx;
 
-  os << "Deallocator doesn't match the allocator: '"
-     << FunctionsToTrack[PDeallocIdx].Name << "' should be used.";
-  auto Report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N);
+  auto Report = std::make_unique<PathSensitiveBugReport>(
+      BT,
+      "Deallocator doesn't match the allocator: '" +
+          Twine(FunctionsToTrack[PDeallocIdx].Name) + "' should be used.",
+      N);
   Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(AP.first));
   Report->addRange(ArgExpr->getSourceRange());
   markInteresting(Report.get(), AP);
@@ -269,14 +267,13 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
         ExplodedNode *N = C.generateNonFatalErrorNode(State);
         if (!N)
           return;
-        SmallString<128> sbuf;
-        llvm::raw_svector_ostream os(sbuf);
         unsigned int DIdx = FunctionsToTrack[AS->AllocatorIdx].DeallocatorIdx;
-        os << "Allocated data should be released before another call to "
-            << "the allocator: missing a call to '"
-            << FunctionsToTrack[DIdx].Name
-            << "'.";
-        auto Report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N);
+        auto Report = std::make_unique<PathSensitiveBugReport>(
+            BT,
+            "Allocated data should be released before another call to "
+            "the allocator: missing a call to '" +
+                Twine(FunctionsToTrack[DIdx].Name) + "'.",
+            N);
         Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(V));
         Report->addRange(ArgExpr->getSourceRange());
         Report->markInteresting(AS->Region);
@@ -463,10 +460,6 @@ std::unique_ptr<PathSensitiveBugReport>
 MacOSKeychainAPIChecker::generateAllocatedDataNotReleasedReport(
     const AllocationPair &AP, ExplodedNode *N, CheckerContext &C) const {
   const ADFunctionInfo &FI = FunctionsToTrack[AP.second->AllocatorIdx];
-  SmallString<70> sbuf;
-  llvm::raw_svector_ostream os(sbuf);
-  os << "Allocated data is not released: missing a call to '"
-      << FunctionsToTrack[FI.DeallocatorIdx].Name << "'.";
 
   // Most bug reports are cached at the location where they occurred.
   // With leaks, we want to unique them by the location where they were
@@ -480,8 +473,10 @@ MacOSKeychainAPIChecker::generateAllocatedDataNotReleasedReport(
         AllocStmt, C.getSourceManager(), AllocNode->getStackFrame());
 
   auto Report = std::make_unique<PathSensitiveBugReport>(
-      BT, os.str(), N, LocUsedForUniqueing,
-      AllocNode->getStackFrame()->getDecl());
+      BT,
+      "Allocated data is not released: missing a call to '" +
+          Twine(FunctionsToTrack[FI.DeallocatorIdx].Name) + "'.",
+      N, LocUsedForUniqueing, AllocNode->getStackFrame()->getDecl());
 
   Report->addVisitor(std::make_unique<SecKeychainBugVisitor>(AP.first));
   markInteresting(Report.get(), AP);
diff --git a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
index 4b55c7c49caa8..2cc633fa5649f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
@@ -22,7 +22,6 @@
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
-#include "llvm/ADT/StringExtras.h"
 
 using namespace clang;
 using namespace ento;
@@ -280,14 +279,11 @@ std::unique_ptr<PathSensitiveBugReport>
 NonNullParamChecker::genReportNullAttrNonNull(const ExplodedNode *ErrorNode,
                                               const Expr *ArgE,
                                               unsigned IdxOfArg) const {
-  llvm::SmallString<256> SBuf;
-  llvm::raw_svector_ostream OS(SBuf);
-  OS << "Null pointer passed to "
-     << IdxOfArg << llvm::getOrdinalSuffix(IdxOfArg)
-     << " parameter expecting 'nonnull'";
-
-  auto R =
-      std::make_unique<PathSensitiveBugReport>(BTAttrNonNull, SBuf, ErrorNode);
+  auto R = std::make_unique<PathSensitiveBugReport>(
+      BTAttrNonNull,
+      "Null pointer passed to " + Twine(IdxOfArg) +
+          llvm::getOrdinalSuffix(IdxOfArg) + " parameter expecting 'nonnull'",
+      ErrorNode);
   if (ArgE)
     bugreporter::trackExpressionValue(ErrorNode, ArgE, *R);
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
index 2839ef0b6d2e6..5de7daae1b10f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
@@ -10,15 +10,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/AST/Attr.h"
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/raw_ostream.h"
 #include <optional>
 
 using namespace clang;
@@ -70,14 +68,11 @@ UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE,
     if (std::optional<UndefinedVal> V =
             state->getSVal(Var.getOriginalRegion()).getAs<UndefinedVal>()) {
       if (ExplodedNode *N = C.generateErrorNode()) {
-        // Generate a bug report.
-        SmallString<128> buf;
-        llvm::raw_svector_ostream os(buf);
-
-        os << "Variable '" << VD->getName()
-           << "' is uninitialized when captured by block";
-
-        auto R = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N);
+        auto R = std::make_unique<PathSensitiveBugReport>(
+            BT,
+            "Variable '" + Twine(VD->getName()) +
+                "' is uninitialized when captured by block",
+            N);
         if (const Expr *Ex = FindBlockDeclRefExpr(BE->getBody(), VD))
           R->addRange(Ex->getSourceRange());
         bugreporter::trackStoredValue(*V, VR, *R,
diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedNewArraySizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedNewArraySizeChecker.cpp
index f053ee887a1aa..dcab55a7e370d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/UndefinedNewArraySizeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedNewArraySizeChecker.cpp
@@ -55,13 +55,8 @@ void UndefinedNewArraySizeChecker::HandleUndefinedArrayElementCount(
     CheckerContext &C, SVal ArgVal, const Expr *Init, SourceRange Range) const {
 
   if (ExplodedNode *N = C.generateErrorNode()) {
-
-    SmallString<100> buf;
-    llvm::raw_svector_ostream os(buf);
-
-    os << "Element count in new[] is a garbage value";
-
-    auto R = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N);
+    auto R = std::make_unique<PathSensitiveBugReport>(
+        BT, "Element count in new[] is a garbage value", N);
     R->markInteresting(ArgVal);
     R->addRange(Range);
     bugreporter::trackExpressionValue(N, Init, *R);
diff --git a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
index 4df751d203973..e51a74f725975 100644
--- a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
@@ -22,8 +22,6 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <optional>
 
 using namespace clang;
@@ -480,11 +478,9 @@ bool UnixAPIPortabilityChecker::ReportZeroByteAllocation(
   if (!N)
     return false;
 
-  SmallString<256> S;
-  llvm::raw_svector_ostream os(S);
-  os << "Call to '" << fn_name << "' has an allocation size of 0 bytes";
-  auto report =
-      std::make_unique<PathSensitiveBugReport>(BT_mallocZero, os.str(), N);
+  auto report = std::make_unique<PathSensitiveBugReport>(
+      BT_mallocZero,
+      "Call to '" + Twine(fn_name) + "' has an allocation size of 0 bytes", N);
 
   report->addRange(arg->getSourceRange());
   bugreporter::trackExpressionValue(N, arg, *report);
diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index d5c91a20d60b2..1d82ac0f7225a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -21,7 +21,6 @@
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
-#include "llvm/Support/raw_ostream.h"
 #include <optional>
 
 using namespace clang;
@@ -213,12 +212,11 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, ProgramStateRef State,
   if (!N)
     return;
 
-  SmallString<256> buf;
-  llvm::raw_svector_ostream os(buf);
-  os << "Declared variable-length array (VLA) ";
-  os << "has tainted (attacker controlled) size that can be 0 or negative";
-
-  auto report = std::make_unique<PathSensitiveBugReport>(TaintBT, os.str(), N);
+  auto report = std::make_unique<PathSensitiveBugReport>(
+      TaintBT,
+      "Declared variable-length array (VLA) has tainted (attacker controlled) "
+      "size that can be 0 or negative",
+      N);
   report->addRange(SizeE->getSourceRange());
   bugreporter::trackExpressionValue(N, SizeE, *report);
   // The vla size may be a complex expression where multiple memory locations
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index b6c709963501f..b609a98b0aed2 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -2154,7 +2154,7 @@ LLVM_ATTRIBUTE_USED static bool isHidden(const CheckerRegistryData &Registry,
 }
 
 PathSensitiveBugReport::PathSensitiveBugReport(
-    const BugType &bt, StringRef shortDesc, StringRef desc,
+    const BugType &bt, const llvm::Twine &shortDesc, const llvm::Twine &desc,
     const ExplodedNode *errorNode, PathDiagnosticLocation LocationToUnique,
     const Decl *DeclToUnique)
     : BugReport(Kind::PathSensitive, bt, shortDesc, desc), ErrorNode(errorNode),

>From 09058457eeb62365e28e4ceb40654f2359569548 Mon Sep 17 00:00:00 2001
From: "forking-google-bazel-bot[bot]"
 <265904573+forking-google-bazel-bot[bot]@users.noreply.github.com>
Date: Wed, 24 Jun 2026 07:48:29 -0400
Subject: [PATCH 22/42] [Bazel] Fixes 25ae6ce (#205501)

This fixes 25ae6ce4801f6f6addae5079323870d4191e7531.

Co-authored-by: Google Bazel Bot <google-bazel-bot at google.com>
---
 .../libc/test/src/stdlib/BUILD.bazel                     | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel
index 8d30869056ec7..8e7d6b40e2fdf 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel
@@ -150,10 +150,19 @@ libc_test(
     ],
 )
 
+libc_test_library(
+    name = "qsort_r_test_helper",
+    hdrs = ["QsortReentrantTest.h"],
+    deps = [
+        "//libc/test/UnitTest:LibcUnitTest",
+    ],
+)
+
 libc_test(
     name = "qsort_r_test",
     srcs = ["qsort_r_test.cpp"],
     deps = [
+        ":qsort_r_test_helper",
         "//libc:qsort_r",
         "//libc:types_size_t",
     ],

>From 94819d04c317d7d17e26a6938208efa856a97d24 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Wed, 24 Jun 2026 14:14:15 +0200
Subject: [PATCH 23/42] [libc++] Move constexpr/explicit macros to
 <__configuration/language.h> (#205535)

These macros are essentially a property of the language mode we're in,
so move them to `<__configuration/language.h>`.
---
 libcxx/include/__config                   | 36 -----------------------
 libcxx/include/__configuration/language.h | 36 +++++++++++++++++++++++
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index fdd0558fbec6f..714cd0fd26b36 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -238,42 +238,6 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0
 #  endif
 
-#  if _LIBCPP_STD_VER <= 11
-#    define _LIBCPP_EXPLICIT_SINCE_CXX14
-#  else
-#    define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit
-#  endif
-
-#  if _LIBCPP_STD_VER >= 14
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr
-#  else
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX14
-#  endif
-
-#  if _LIBCPP_STD_VER >= 17
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX17 constexpr
-#  else
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX17
-#  endif
-
-#  if _LIBCPP_STD_VER >= 20
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX20 constexpr
-#  else
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX20
-#  endif
-
-#  if _LIBCPP_STD_VER >= 23
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX23 constexpr
-#  else
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX23
-#  endif
-
-#  if _LIBCPP_STD_VER >= 26
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX26 constexpr
-#  else
-#    define _LIBCPP_CONSTEXPR_SINCE_CXX26
-#  endif
-
 // Thread API
 // clang-format off
 #  if _LIBCPP_HAS_THREADS &&                                                                                           \
diff --git a/libcxx/include/__configuration/language.h b/libcxx/include/__configuration/language.h
index 3137ba2ea27ef..1205934334852 100644
--- a/libcxx/include/__configuration/language.h
+++ b/libcxx/include/__configuration/language.h
@@ -56,4 +56,40 @@
 #  define _LIBCPP_HAS_CHAR8_T 1
 #endif
 
+#if _LIBCPP_STD_VER <= 11
+#  define _LIBCPP_EXPLICIT_SINCE_CXX14
+#else
+#  define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit
+#endif
+
+#if _LIBCPP_STD_VER >= 14
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr
+#else
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX14
+#endif
+
+#if _LIBCPP_STD_VER >= 17
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX17 constexpr
+#else
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX17
+#endif
+
+#if _LIBCPP_STD_VER >= 20
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX20 constexpr
+#else
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX20
+#endif
+
+#if _LIBCPP_STD_VER >= 23
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX23 constexpr
+#else
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX23
+#endif
+
+#if _LIBCPP_STD_VER >= 26
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX26 constexpr
+#else
+#  define _LIBCPP_CONSTEXPR_SINCE_CXX26
+#endif
+
 #endif // _LIBCPP___CONFIGURATION_LANGUAGE_H

>From 1ad2d369d18da9cb65a0d518f5a1b8bd98577d3e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 24 Jun 2026 13:16:03 +0100
Subject: [PATCH 24/42] [X86] combineAddOfPMADDWD - use MaskedVectorIsZero
 directly instead of MaskedValueIsZero. NFC. (#205534)

We're setting all demanded bits and just want to know that the high elements in each pair are zero.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 291124bb485cb..d3729b4102c55 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59814,15 +59814,12 @@ static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1,
 
   unsigned NumElts = VT.getVectorNumElements();
   MVT OpVT = N0.getOperand(0).getSimpleValueType();
-  APInt DemandedBits = APInt::getAllOnes(OpVT.getScalarSizeInBits());
   APInt DemandedHiElts = APInt::getSplat(2 * NumElts, APInt(2, 2));
 
-  bool Op0HiZero =
-      DAG.MaskedValueIsZero(N0.getOperand(0), DemandedBits, DemandedHiElts) ||
-      DAG.MaskedValueIsZero(N0.getOperand(1), DemandedBits, DemandedHiElts);
-  bool Op1HiZero =
-      DAG.MaskedValueIsZero(N1.getOperand(0), DemandedBits, DemandedHiElts) ||
-      DAG.MaskedValueIsZero(N1.getOperand(1), DemandedBits, DemandedHiElts);
+  bool Op0HiZero = DAG.MaskedVectorIsZero(N0.getOperand(0), DemandedHiElts) ||
+                   DAG.MaskedVectorIsZero(N0.getOperand(1), DemandedHiElts);
+  bool Op1HiZero = DAG.MaskedVectorIsZero(N1.getOperand(0), DemandedHiElts) ||
+                   DAG.MaskedVectorIsZero(N1.getOperand(1), DemandedHiElts);
 
   // TODO: Check for zero lower elements once we have actual codegen that
   // creates them.

>From 4de5455b77e177137c46876d6d4ae171d06b15fa Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen at gmail.com>
Date: Wed, 24 Jun 2026 13:25:09 +0100
Subject: [PATCH 25/42] =?UTF-8?q?[NFC]=20use=20DenseMap/SmallPtrSet=20in?=
 =?UTF-8?q?=20CacheMetrics=20and=20TailDupli=E2=80=A6=20(#205480)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…cation

Swap pointer-keyed std::unordered_map/std::set for their ADT equivalents
on hot paths.
---
 bolt/lib/Passes/CacheMetrics.cpp    | 34 ++++++++++++++---------------
 bolt/lib/Passes/TailDuplication.cpp |  2 +-
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/bolt/lib/Passes/CacheMetrics.cpp b/bolt/lib/Passes/CacheMetrics.cpp
index ccc25fc0c9f4f..8c6f3ff7c4308 100644
--- a/bolt/lib/Passes/CacheMetrics.cpp
+++ b/bolt/lib/Passes/CacheMetrics.cpp
@@ -14,7 +14,6 @@
 #include "bolt/Passes/CacheMetrics.h"
 #include "bolt/Core/BinaryBasicBlock.h"
 #include "bolt/Core/BinaryFunction.h"
-#include <unordered_map>
 
 using namespace llvm;
 using namespace bolt;
@@ -29,10 +28,9 @@ constexpr unsigned ITLBPageSize = 4096;
 constexpr unsigned ITLBEntries = 16;
 
 /// Initialize and return a position map for binary basic blocks
-void extractBasicBlockInfo(
-    const BinaryFunctionListType &BinaryFunctions,
-    std::unordered_map<BinaryBasicBlock *, uint64_t> &BBAddr,
-    std::unordered_map<BinaryBasicBlock *, uint64_t> &BBSize) {
+void extractBasicBlockInfo(const BinaryFunctionListType &BinaryFunctions,
+                           DenseMap<BinaryBasicBlock *, uint64_t> &BBAddr,
+                           DenseMap<BinaryBasicBlock *, uint64_t> &BBSize) {
 
   for (BinaryFunction *BF : BinaryFunctions) {
     const BinaryContext &BC = BF->getBinaryContext();
@@ -55,8 +53,8 @@ void extractBasicBlockInfo(
 /// (the number of fallthrough branches, the total number of branches)
 std::pair<uint64_t, uint64_t>
 calcTSPScore(const BinaryFunctionListType &BinaryFunctions,
-             const std::unordered_map<BinaryBasicBlock *, uint64_t> &BBAddr,
-             const std::unordered_map<BinaryBasicBlock *, uint64_t> &BBSize) {
+             const DenseMap<BinaryBasicBlock *, uint64_t> &BBAddr,
+             const DenseMap<BinaryBasicBlock *, uint64_t> &BBSize) {
   uint64_t Score = 0;
   uint64_t JumpCount = 0;
   for (BinaryFunction *BF : BinaryFunctions) {
@@ -94,9 +92,9 @@ using Predecessors = std::vector<std::pair<BinaryFunction *, uint64_t>>;
 
 /// Build a simplified version of the call graph: For every function, keep
 /// its callers and the frequencies of the calls
-std::unordered_map<const BinaryFunction *, Predecessors>
+DenseMap<const BinaryFunction *, Predecessors>
 extractFunctionCalls(const BinaryFunctionListType &BinaryFunctions) {
-  std::unordered_map<const BinaryFunction *, Predecessors> Calls;
+  DenseMap<const BinaryFunction *, Predecessors> Calls;
 
   for (BinaryFunction *SrcFunction : BinaryFunctions) {
     const BinaryContext &BC = SrcFunction->getBinaryContext();
@@ -139,15 +137,15 @@ extractFunctionCalls(const BinaryFunctionListType &BinaryFunctions) {
 /// is proportional to the number of samples corresponding to the functions on
 /// the page. The following procedure detects short and long calls, and
 /// estimates the expected number of cache misses for the long ones.
-double expectedCacheHitRatio(
-    const BinaryFunctionListType &BinaryFunctions,
-    const std::unordered_map<BinaryBasicBlock *, uint64_t> &BBAddr,
-    const std::unordered_map<BinaryBasicBlock *, uint64_t> &BBSize) {
-  std::unordered_map<const BinaryFunction *, Predecessors> Calls =
+double
+expectedCacheHitRatio(const BinaryFunctionListType &BinaryFunctions,
+                      const DenseMap<BinaryBasicBlock *, uint64_t> &BBAddr,
+                      const DenseMap<BinaryBasicBlock *, uint64_t> &BBSize) {
+  DenseMap<const BinaryFunction *, Predecessors> Calls =
       extractFunctionCalls(BinaryFunctions);
   // Compute 'hotness' of the functions
   double TotalSamples = 0;
-  std::unordered_map<BinaryFunction *, double> FunctionSamples;
+  DenseMap<BinaryFunction *, double> FunctionSamples;
   for (BinaryFunction *BF : BinaryFunctions) {
     double Samples = 0;
     for (std::pair<BinaryFunction *, uint64_t> Pair : Calls[BF])
@@ -158,7 +156,7 @@ double expectedCacheHitRatio(
   }
 
   // Compute 'hotness' of the pages
-  std::unordered_map<uint64_t, double> PageSamples;
+  DenseMap<uint64_t, double> PageSamples;
   for (BinaryFunction *BF : BinaryFunctions) {
     if (BF->getLayout().block_empty())
       continue;
@@ -266,8 +264,8 @@ void CacheMetrics::printAll(raw_ostream &OS,
                double(HotCodeSize) / HugePage2MB);
 
   // Stats related to expected cache performance
-  std::unordered_map<BinaryBasicBlock *, uint64_t> BBAddr;
-  std::unordered_map<BinaryBasicBlock *, uint64_t> BBSize;
+  DenseMap<BinaryBasicBlock *, uint64_t> BBAddr;
+  DenseMap<BinaryBasicBlock *, uint64_t> BBSize;
   extractBasicBlockInfo(BFs, BBAddr, BBSize);
 
   OS << "  Expected i-TLB cache hit ratio: "
diff --git a/bolt/lib/Passes/TailDuplication.cpp b/bolt/lib/Passes/TailDuplication.cpp
index c5565fdf4a7a7..a4a5876943ec4 100644
--- a/bolt/lib/Passes/TailDuplication.cpp
+++ b/bolt/lib/Passes/TailDuplication.cpp
@@ -130,7 +130,7 @@ bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB,
     BinaryBasicBlock *NextBB = *Itr;
     Q.push(NextBB);
   }
-  std::set<BinaryBasicBlock *> Visited;
+  SmallPtrSet<BinaryBasicBlock *, 16> Visited;
   // Breadth first search through successive blocks and see if Reg is ever used
   // before its overwritten
   while (Q.size() > 0) {

>From 1cf4a0cb7909cb7b9e0bf15757fc77dc9b02c796 Mon Sep 17 00:00:00 2001
From: Sairudra More <sairudra60 at gmail.com>
Date: Wed, 24 Jun 2026 17:59:23 +0530
Subject: [PATCH 26/42] [flang][OpenMP] Lower task reduction modifier (#205124)

Adds Flang lowering and MLIR-to-LLVM IR translation support for the
OpenMP `task` modifier on reduction clauses for `parallel`,
`do`/`wsloop`, and `sections`.

Unsupported forms remain diagnosed/TODO-gated rather than silently
lowered.

Fixes #205123.
---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |   9 +-
 .../test/Lower/OpenMP/Todo/reduction-task.f90 |  12 -
 .../Lower/OpenMP/parallel-reduction-task.f90  |  37 +++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 158 +++++++++++--
 .../openmp-reduction-task-modifier.mlir       | 216 ++++++++++++++++++
 mlir/test/Target/LLVMIR/openmp-todo.mlir      |  28 +++
 6 files changed, 427 insertions(+), 33 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/Todo/reduction-task.f90
 create mode 100644 flang/test/Lower/OpenMP/parallel-reduction-task.f90
 create mode 100644 mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index eb416d103fbe0..4f19dfb98024d 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -2052,12 +2052,9 @@ bool ClauseProcessor::processReduction(
 
         auto mod = std::get<std::optional<ReductionModifier>>(clause.t);
         if (mod.has_value()) {
-          if (mod.value() == ReductionModifier::Task)
-            TODO(currentLocation, "Reduction modifier `task` is not supported");
-          else
-            result.reductionMod = mlir::omp::ReductionModifierAttr::get(
-                converter.getFirOpBuilder().getContext(),
-                translateReductionModifier(mod.value()));
+          result.reductionMod = mlir::omp::ReductionModifierAttr::get(
+              converter.getFirOpBuilder().getContext(),
+              translateReductionModifier(mod.value()));
         }
 
         ReductionProcessor rp;
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
deleted file mode 100644
index adc8de00a9b7a..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-task.f90
+++ /dev/null
@@ -1,12 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction modifier `task` is not supported
-subroutine reduction_task()
-  integer :: i
-  i = 0
-
-  !$omp parallel reduction(task, +:i)
-  i = i + 1
-  !$omp end parallel
-end subroutine reduction_task
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-task.f90 b/flang/test/Lower/OpenMP/parallel-reduction-task.f90
new file mode 100644
index 0000000000000..ee46b0044249f
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-reduction-task.f90
@@ -0,0 +1,37 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+! Check that the `task` reduction modifier is lowered to the `task`
+! reduction modifier attribute on the parallel and worksharing constructs.
+
+! CHECK-LABEL: func.func @_QPreduction_task_parallel
+subroutine reduction_task_parallel()
+  integer :: i
+  i = 0
+  ! CHECK: omp.parallel reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+  !$omp parallel reduction(task, +:i)
+  i = i + 1
+  !$omp end parallel
+end subroutine reduction_task_parallel
+
+! CHECK-LABEL: func.func @_QPreduction_task_do
+subroutine reduction_task_do()
+  integer :: i, j
+  i = 0
+  ! CHECK: omp.wsloop {{.*}}reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+  !$omp do reduction(task, +:i)
+  do j = 1, 10
+    i = i + 1
+  end do
+  !$omp end do
+end subroutine reduction_task_do
+
+! CHECK-LABEL: func.func @_QPreduction_task_sections
+subroutine reduction_task_sections()
+  integer :: i
+  i = 0
+  ! CHECK: omp.sections {{.*}}reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref<i32>) {
+  !$omp sections reduction(task, +:i)
+  i = i + 1
+  !$omp end sections
+end subroutine reduction_task_sections
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 0f954e384929a..edfa407234fa0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -392,8 +392,26 @@ static LogicalResult checkImplementationStatus(Operation &op) {
           op.getReductionSyms())
         result = todo("reduction");
     if (op.getReductionMod() &&
-        op.getReductionMod().value() != omp::ReductionModifier::defaultmod)
-      result = todo("reduction with modifier");
+        op.getReductionMod().value() != omp::ReductionModifier::defaultmod) {
+      omp::ReductionModifier mod = op.getReductionMod().value();
+      // The `task` reduction modifier is supported on the parallel and
+      // worksharing (do/for and sections) constructs. Other modifiers, and the
+      // `task` modifier on other constructs, are not yet implemented.
+      bool taskModifierSupported =
+          mod == omp::ReductionModifier::task &&
+          isa<omp::ParallelOp, omp::WsloopOp, omp::SectionsOp>(op);
+      if (!taskModifierSupported) {
+        result = todo("reduction with modifier");
+      } else if (auto byref = op.getReductionByref()) {
+        // The task reduction modifier lowering only handles non-byref
+        // reductions for now.
+        for (bool isByRef : *byref)
+          if (isByRef) {
+            result = todo("task reduction modifier with by-ref reduction");
+            break;
+          }
+      }
+    }
   };
   auto checkTaskReductionByref = [&todo](auto op, LogicalResult &result) {
     if (auto byrefAttr = op.getTaskReductionByref())
@@ -2024,6 +2042,23 @@ static bool constructIsCancellable(Operation *op) {
       .wasInterrupted();
 }
 
+// Forward declarations for the task-reduction helpers defined alongside the
+// omp.taskgroup lowering further down in this file. These are shared by the
+// `reduction(task, ...)` modifier lowering on the parallel/worksharing
+// constructs and by the omp.taskgroup / omp.taskloop.context task_reduction
+// lowering. When \p isModifier is set, `__kmpc_taskred_modifier_init` is
+// emitted (opening a task-reduction scope) instead of `__kmpc_taskred_init`,
+// with \p isWorksharing selecting the runtime `is_ws` argument.
+static llvm::Value *emitTaskReductionInitCall(
+    ArrayRef<omp::DeclareReductionOp> redDecls,
+    ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
+    llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
+    LLVM::ModuleTranslation &moduleTranslation, bool isModifier = false,
+    bool isWorksharing = false);
+static void
+emitTaskReductionModifierFini(bool isWorksharing, llvm::IRBuilderBase &builder,
+                              LLVM::ModuleTranslation &moduleTranslation);
+
 static LogicalResult
 convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
                    LLVM::ModuleTranslation &moduleTranslation) {
@@ -2057,6 +2092,10 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
           isByRef)))
     return failure();
 
+  bool isTaskReductionMod =
+      sectionsOp.getReductionMod() == omp::ReductionModifier::task &&
+      sectionsOp.getNumReductionVars() > 0;
+
   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
 
   for (Operation &op : *sectionsOp.getRegion().begin()) {
@@ -2096,6 +2135,19 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
   if (sectionCBs.empty())
     return success();
 
+  // For `reduction(task, ...)` open a task-reduction scope for the worksharing
+  // region. Participating explicit tasks accumulate into the per-thread private
+  // copies, which the worksharing reduction then combines across threads. This
+  // is emitted only after the empty-sections early return above, so it stays
+  // balanced with the matching fini emitted after the sections region.
+  if (isTaskReductionMod &&
+      !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+                                 "__omp_taskred_mod_", builder, allocaIP,
+                                 moduleTranslation, /*isModifier=*/true,
+                                 /*isWorksharing=*/true))
+    return sectionsOp.emitError(
+        "failed to emit task reduction modifier initialization");
+
   assert(isa<omp::SectionOp>(*sectionsOp.getRegion().op_begin()));
 
   // TODO: Perform appropriate actions according to the data-sharing
@@ -2125,6 +2177,11 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
 
   builder.restoreIP(*afterIP);
 
+  // Close the task-reduction scope before combining the worksharing copies.
+  if (isTaskReductionMod)
+    emitTaskReductionModifierFini(/*isWorksharing=*/true, builder,
+                                  moduleTranslation);
+
   // Process the reductions if required.
   return createReductionsAndCleanup(
       sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls,
@@ -3484,15 +3541,6 @@ computeTaskloopBounds(omp::LoopNestOp loopOp, llvm::IRBuilderBase &builder,
   return llvm::Error::success();
 }
 
-// Forward declaration: defined alongside the taskgroup task_reduction
-// lowering further down in this file. Shared between omp.taskgroup and
-// omp.taskloop.context translation.
-static llvm::Value *emitTaskReductionInitCall(
-    ArrayRef<omp::DeclareReductionOp> redDecls,
-    ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
-    llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
-    LLVM::ModuleTranslation &moduleTranslation);
-
 // Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp,
@@ -4060,8 +4108,11 @@ emitTaskReductionCombFn(omp::DeclareReductionOp decl, StringRef baseName,
 /// \p allocaIP. \p helperNamePrefix is used to disambiguate the generated
 /// init/combiner helper symbol names between taskgroup and taskloop callers.
 ///
-/// Returns the `ptr` value produced by `__kmpc_taskred_init` (the taskgroup
-/// reduction handle), or null on failure.
+/// When \p isModifier is false, emits `__kmpc_taskred_init` and returns the
+/// `ptr` value it produces (the taskgroup reduction handle). When \p isModifier
+/// is true, emits `__kmpc_taskred_modifier_init` instead to open a
+/// task-reduction scope for a parallel or worksharing construct, passing
+/// \p isWorksharing as the runtime `is_ws` argument. Returns null on failure.
 ///
 /// Only the non-byref form is handled here. Byref reductions have already
 /// been rejected by `checkImplementationStatus`.
@@ -4069,7 +4120,8 @@ static llvm::Value *emitTaskReductionInitCall(
     ArrayRef<omp::DeclareReductionOp> redDecls,
     ArrayRef<llvm::Value *> origPtrs, StringRef helperNamePrefix,
     llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP,
-    LLVM::ModuleTranslation &moduleTranslation) {
+    LLVM::ModuleTranslation &moduleTranslation, bool isModifier,
+    bool isWorksharing) {
   assert(redDecls.size() == origPtrs.size() &&
          "expected one orig pointer per reduction decl");
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
@@ -4138,7 +4190,7 @@ static llvm::Value *emitTaskReductionInitCall(
     storeField(6, llvm::ConstantInt::get(i32Ty, 0));      // flags
   }
 
-  // Emit call: __kmpc_taskred_init(gtid, num, &arr).
+  // Emit the runtime call that registers the task reduction data.
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   uint32_t srcLocSize;
   llvm::Constant *srcLocStr =
@@ -4146,12 +4198,45 @@ static llvm::Value *emitTaskReductionInitCall(
   llvm::Value *ident = ompBuilder->getOrCreateIdent(srcLocStr, srcLocSize);
   ompBuilder->updateToLocation(ompLoc);
   llvm::Value *gtid = ompBuilder->getOrCreateThreadID(ident);
+  if (isModifier) {
+    // __kmpc_taskred_modifier_init(loc, gtid, is_ws, num, &arr) opens a
+    // task-reduction scope for the enclosing parallel/worksharing region.
+    llvm::FunctionCallee modInit = ompBuilder->getOrCreateRuntimeFunction(
+        *llvmModule, llvm::omp::OMPRTL___kmpc_taskred_modifier_init);
+    return builder.CreateCall(modInit,
+                              {ident, gtid,
+                               builder.getInt32(isWorksharing ? 1 : 0),
+                               builder.getInt32(n), arrAlloca},
+                              ".taskred.desc");
+  }
+  // __kmpc_taskred_init(gtid, num, &arr).
   llvm::FunctionCallee taskredInit = ompBuilder->getOrCreateRuntimeFunction(
       *llvmModule, llvm::omp::OMPRTL___kmpc_taskred_init);
   return builder.CreateCall(taskredInit, {gtid, builder.getInt32(n), arrAlloca},
                             ".taskred.desc");
 }
 
+/// Emits `__kmpc_task_reduction_modifier_fini(loc, gtid, is_ws)` at the current
+/// builder insertion point, closing the task-reduction scope opened by the
+/// `task` reduction modifier on a parallel or worksharing construct.
+static void
+emitTaskReductionModifierFini(bool isWorksharing, llvm::IRBuilderBase &builder,
+                              LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
+  llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
+  uint32_t srcLocSize;
+  llvm::Constant *srcLocStr =
+      ompBuilder->getOrCreateSrcLocStr(ompLoc, srcLocSize);
+  llvm::Value *ident = ompBuilder->getOrCreateIdent(srcLocStr, srcLocSize);
+  ompBuilder->updateToLocation(ompLoc);
+  llvm::Value *gtid = ompBuilder->getOrCreateThreadID(ident);
+  llvm::FunctionCallee fini = ompBuilder->getOrCreateRuntimeFunction(
+      *llvmModule, llvm::omp::OMPRTL___kmpc_task_reduction_modifier_fini);
+  builder.CreateCall(fini,
+                     {ident, gtid, builder.getInt32(isWorksharing ? 1 : 0)});
+}
+
 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
@@ -4334,6 +4419,20 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
                                reductionVariableMap, isByRef, deferredStores)))
     return failure();
 
+  // For `reduction(task, ...)` open a task-reduction scope for the worksharing
+  // loop. Participating explicit tasks accumulate into the per-thread private
+  // copies, which the worksharing reduction then combines across threads.
+  bool isTaskReductionMod =
+      wsloopOp.getReductionMod() == omp::ReductionModifier::task &&
+      wsloopOp.getNumReductionVars() > 0;
+  if (isTaskReductionMod &&
+      !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+                                 "__omp_taskred_mod_", builder, allocaIP,
+                                 moduleTranslation, /*isModifier=*/true,
+                                 /*isWorksharing=*/true))
+    return wsloopOp.emitError(
+        "failed to emit task reduction modifier initialization");
+
   // TODO: Handle doacross loops when the ordered clause has a parameter.
   bool isOrdered = wsloopOp.getOrdered().has_value();
   std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
@@ -4443,6 +4542,11 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   // Set the correct branch target for task cancellation
   popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get());
 
+  // Close the task-reduction scope before the worksharing reduction combine.
+  if (isTaskReductionMod)
+    emitTaskReductionModifierFini(/*isWorksharing=*/true, builder,
+                                  moduleTranslation);
+
   // Process the reductions if required.
   if (failed(createReductionsAndCleanup(
           wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls,
@@ -4475,6 +4579,13 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
   SmallVector<llvm::Value *> privateReductionVariables(
       opInst.getNumReductionVars());
   SmallVector<DeferredStore> deferredStores;
+  // Only open a task-reduction scope when the `task` modifier is present and
+  // there are reduction variables to combine; otherwise the matching fini in
+  // the reduction-combine path (guarded by getNumReductionVars() > 0) would be
+  // skipped, leaving the modifier init unbalanced.
+  bool isTaskReductionMod =
+      opInst.getReductionMod() == omp::ReductionModifier::task &&
+      opInst.getNumReductionVars() > 0;
 
   auto bodyGenCB =
       [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
@@ -4522,6 +4633,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
                               reductionVariableMap, isByRef, deferredStores)))
       return llvm::make_error<PreviouslyReportedError>();
 
+    // For `reduction(task, ...)` open a task-reduction scope so participating
+    // explicit tasks accumulate into the per-thread private copies; the
+    // parallel reduction then combines those copies across the team.
+    if (isTaskReductionMod &&
+        !emitTaskReductionInitCall(reductionDecls, privateReductionVariables,
+                                   "__omp_taskred_mod_", builder, allocaIP,
+                                   moduleTranslation, /*isModifier=*/true,
+                                   /*isWorksharing=*/false))
+      return llvm::createStringError(
+          "failed to emit task reduction modifier initialization");
+
     // Save the alloca insertion point on ModuleTranslation stack for use in
     // nested regions.
     LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
@@ -4549,6 +4671,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
       // Move to region cont block
       builder.SetInsertPoint((*regionBlock)->getTerminator());
 
+      // Close the task-reduction scope before the per-thread reduction
+      // contributions are combined across the team.
+      if (isTaskReductionMod)
+        emitTaskReductionModifierFini(/*isWorksharing=*/false, builder,
+                                      moduleTranslation);
+
       // Generate reductions from info
       llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
       builder.SetInsertPoint(tempTerminator);
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir
new file mode 100644
index 0000000000000..a15b1ee701a4e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir
@@ -0,0 +1,216 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// The `task` reduction modifier opens a task-reduction scope around the
+// parallel / worksharing region. Verify that
+// __kmpc_taskred_modifier_init is emitted (with the correct `is_ws` argument)
+// after the reduction privates are set up, and that
+// __kmpc_task_reduction_modifier_fini is emitted before the reduction combine.
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @parallel_task_reduction(%x: !llvm.ptr) {
+  omp.parallel reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK: %kmp_taskred_input_t = type { ptr, ptr, i64, ptr, ptr, ptr, i32 }
+
+// On a parallel construct the modifier init uses is_ws = 0.
+// CHECK-LABEL: define internal void @parallel_task_reduction..omp_par
+// CHECK:         %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK:         call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 0, i32 1, ptr %[[ARR]])
+// CHECK:         call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 0)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @wsloop_task_reduction(%x: !llvm.ptr) {
+  %lb = llvm.mlir.constant(1 : i32) : i32
+  %ub = llvm.mlir.constant(10 : i32) : i32
+  %step = llvm.mlir.constant(1 : i32) : i32
+  omp.wsloop reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+    omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) {
+      omp.yield
+    }
+  }
+  llvm.return
+}
+
+// On a worksharing construct the modifier init uses is_ws = 1.
+// CHECK-LABEL: define void @wsloop_task_reduction(
+// CHECK:         %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK:         call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 1, i32 1, ptr %[[ARR]])
+// CHECK:         call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 1)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @sections_task_reduction(%x: !llvm.ptr) {
+  omp.sections reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+    omp.section {
+    ^bb0(%arg: !llvm.ptr):
+      omp.terminator
+    }
+    omp.terminator
+  }
+  llvm.return
+}
+
+// On a worksharing (sections) construct the modifier init uses is_ws = 1.
+// CHECK-LABEL: define void @sections_task_reduction(
+// CHECK:         %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t]
+// CHECK:         call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 1, i32 1, ptr %[[ARR]])
+// CHECK:         call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 1)
+
+// -----
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @parallel_two_task_reductions(%x: !llvm.ptr, %y: !llvm.ptr) {
+  omp.parallel reduction(mod: task, @add_i32 %x -> %p0, @add_i32 %y -> %p1 : !llvm.ptr, !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// With two task-modifier reductions the descriptor array holds two entries and
+// the modifier init receives num = 2 (is_ws = 0 on the parallel construct).
+// CHECK-LABEL: define internal void @parallel_two_task_reductions..omp_par
+// CHECK:         %[[ARR:.+]] = alloca [2 x %kmp_taskred_input_t]
+// CHECK:         call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 0, i32 2, ptr %[[ARR]])
+
+// -----
+
+// An empty omp.sections (only a terminator, no omp.section) hits the
+// empty-sections early return, so no task-reduction scope is opened: neither
+// the modifier init nor the matching fini may be emitted.
+
+omp.declare_reduction @add_i32 : i32
+init {
+^bb0(%arg0: i32):
+  %c0 = llvm.mlir.constant(0 : i32) : i32
+  omp.yield(%c0 : i32)
+}
+combiner {
+^bb0(%arg0: i32, %arg1: i32):
+  %s = llvm.add %arg0, %arg1 : i32
+  omp.yield(%s : i32)
+}
+
+llvm.func @empty_sections_task_reduction(%x: !llvm.ptr) {
+  omp.sections reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: define void @empty_sections_task_reduction(
+// CHECK-NOT:     @__kmpc_taskred_modifier_init
+// CHECK-NOT:     @__kmpc_task_reduction_modifier_fini
+// CHECK:         ret void
+
+// -----
+
+// A verifier-valid omp.parallel that carries reduction_mod = task but has no
+// reduction variables must not open a task-reduction scope.
+
+llvm.func @parallel_task_mod_no_reductions() {
+  "omp.parallel"() <{operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0>, reduction_mod = #omp<reduction_modifier(task)>}> ({
+    omp.terminator
+  }) : () -> ()
+  llvm.return
+}
+
+// CHECK-LABEL: define internal void @parallel_task_mod_no_reductions..omp_par
+// CHECK-NOT:     @__kmpc_taskred_modifier_init
+// CHECK-NOT:     @__kmpc_task_reduction_modifier_fini
+// CHECK:         ret void
+
+// -----
+
+// A verifier-valid omp.wsloop that carries reduction_mod = task but has no
+// reduction variables must not open a task-reduction scope.
+
+llvm.func @wsloop_task_mod_no_reductions() {
+  %lb = llvm.mlir.constant(1 : i32) : i32
+  %ub = llvm.mlir.constant(10 : i32) : i32
+  %step = llvm.mlir.constant(1 : i32) : i32
+  "omp.wsloop"() <{operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 0, 0>, reduction_mod = #omp<reduction_modifier(task)>}> ({
+    "omp.loop_nest"(%lb, %ub, %step) <{loop_inclusive}> ({
+    ^bb0(%iv: i32):
+      "omp.yield"() : () -> ()
+    }) : (i32, i32, i32) -> ()
+  }) : () -> ()
+  llvm.return
+}
+
+// CHECK-LABEL: define void @wsloop_task_mod_no_reductions(
+// CHECK-NOT:     @__kmpc_taskred_modifier_init
+// CHECK-NOT:     @__kmpc_task_reduction_modifier_fini
+// CHECK:         ret void
+
+// -----
+
+// A verifier-valid omp.sections that carries reduction_mod = task but has no
+// reduction variables must not open a task-reduction scope. A section body is
+// present, so this exercises the reduction-count guard rather than the
+// empty-sections early return tested above.
+
+llvm.func @sections_task_mod_no_reductions() {
+  "omp.sections"() <{operandSegmentSizes = array<i32: 0, 0, 0, 0>, reduction_mod = #omp<reduction_modifier(task)>}> ({
+    "omp.section"() ({
+      "omp.terminator"() : () -> ()
+    }) : () -> ()
+    "omp.terminator"() : () -> ()
+  }) : () -> ()
+  llvm.return
+}
+
+// CHECK-LABEL: define void @sections_task_mod_no_reductions(
+// CHECK-NOT:     @__kmpc_taskred_modifier_init
+// CHECK-NOT:     @__kmpc_task_reduction_modifier_fini
+// CHECK:         ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir
index 377a5bb799be4..4d23fcafc80bd 100644
--- a/mlir/test/Target/LLVMIR/openmp-todo.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir
@@ -134,6 +134,34 @@ llvm.func @scan_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) {
 
 // -----
 
+omp.declare_reduction @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+  %2 = llvm.load %arg3 : !llvm.ptr -> f32
+  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
+  omp.yield
+}
+llvm.func @parallel_task_reduction_modifier_byref(%x : !llvm.ptr) {
+  // expected-error at below {{not yet implemented: Unhandled clause task reduction modifier with by-ref reduction in omp.parallel operation}}
+  // expected-error at below {{LLVM Translation failed for operation: omp.parallel}}
+  omp.parallel reduction(mod: task, byref @add_f32 %x -> %prv : !llvm.ptr) {
+    omp.terminator
+  }
+  llvm.return
+}
+
+// -----
+
 llvm.func @single_allocate(%x : !llvm.ptr) {
   // expected-error at below {{not yet implemented: Unhandled clause allocate in omp.single operation}}
   // expected-error at below {{LLVM Translation failed for operation: omp.single}}

>From bfac2845fefcfe829df7df4e32a3a3044ee101a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 24 Jun 2026 14:31:14 +0200
Subject: [PATCH 27/42] [NFC][analyzer] Remove the NodeBuilder from
 VisitArrayInitLoopExpr (#204354)

Part of my commit series to eliminate the class `NodeBuilder`.
---
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 77faa675b90b8..4357f0fae4144 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -3190,20 +3190,20 @@ void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D,
 void ExprEngine::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *Ex,
                                         ExplodedNode *Pred,
                                         ExplodedNodeSet &Dst) {
+  const Expr *Arr = Ex->getCommonExpr()->getSourceExpr();
+
   ExplodedNodeSet CheckerPreStmt;
   getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, Ex, *this);
 
   ExplodedNodeSet EvalSet;
-  NodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);
-
-  const Expr *Arr = Ex->getCommonExpr()->getSourceExpr();
+  if (isa<CXXConstructExpr>(Ex->getSubExpr())) {
+    // The constructor visitor has already handled everything, so let's skip
+    // forward to PostStmt handling by clearing the range of the 'for' loop.
+    EvalSet.insert(CheckerPreStmt);
+    CheckerPreStmt.clear();
+  }
 
   for (auto *Node : CheckerPreStmt) {
-
-    // The constructor visitior has already taken care of everything.
-    if (isa<CXXConstructExpr>(Ex->getSubExpr()))
-      break;
-
     const StackFrame *SF = Node->getStackFrame();
     ProgramStateRef state = Node->getState();
 
@@ -3278,7 +3278,7 @@ void ExprEngine::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *Ex,
     else
       Base = UnknownVal();
 
-    Bldr.generateNode(Ex, Node, state->BindExpr(Ex, SF, Base));
+    EvalSet.insert(Engine.makeNodeWithBinding(Node, Ex, Base));
   }
 
   getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, Ex, *this);

>From 30abd9ec2b8d8a5d95da18a0e190fff489604f27 Mon Sep 17 00:00:00 2001
From: Arda Serdar Pektezol <arda at pektezol.dev>
Date: Wed, 24 Jun 2026 15:36:32 +0300
Subject: [PATCH 28/42] [UnifyFunctionExitNodes] Remove the pass (#205519)

The mergereturn pass is not used by anything, so we can go ahead and
delete it.

Related discussion:
https://github.com/llvm/llvm-project/pull/204651#issuecomment-4787636904
---
 llvm/docs/Passes.md                           |  5 -
 .../Transforms/Utils/UnifyFunctionExitNodes.h | 30 ------
 llvm/lib/Passes/PassBuilder.cpp               |  1 -
 llvm/lib/Passes/PassRegistry.def              |  1 -
 llvm/lib/Transforms/Utils/CMakeLists.txt      |  1 -
 .../Utils/UnifyFunctionExitNodes.cpp          | 94 -------------------
 .../unreachable-blocks-status.ll              | 67 -------------
 7 files changed, 199 deletions(-)
 delete mode 100644 llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
 delete mode 100644 llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
 delete mode 100644 llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll

diff --git a/llvm/docs/Passes.md b/llvm/docs/Passes.md
index 28ea11eb994d9..36159ff0c5206 100644
--- a/llvm/docs/Passes.md
+++ b/llvm/docs/Passes.md
@@ -745,11 +745,6 @@ Read
 {doc}`this <MergeFunctions>`
 article for more details.
 
-### `mergereturn`: Unify function exit nodes
-
-Ensure that functions have at most one `ret` instruction in them.
-Additionally, it keeps track of which node is the new exit node of the CFG.
-
 ### `partial-inliner`: Partial Inliner
 
 This pass performs partial inlining, typically by inlining an `if` statement
diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
deleted file mode 100644
index 92df7b480ff5d..0000000000000
--- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- UnifyFunctionExitNodes.h - Ensure fn's have one return --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is used to ensure that functions have at most one return and one
-// unreachable instruction in them.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H
-#define LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H
-
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
-
-namespace llvm {
-
-class UnifyFunctionExitNodesPass
-    : public OptionalPassInfoMixin<UnifyFunctionExitNodesPass> {
-public:
-  LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index de32ed34a3df4..68ea19332ec33 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -391,7 +391,6 @@
 #include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include "llvm/Transforms/Utils/TriggerCrashPass.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include "llvm/Transforms/Utils/UnifyLoopExits.h"
 #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
 #include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 370106e225a9b..84804c3e9d2a8 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -493,7 +493,6 @@ FUNCTION_PASS("mem2reg", PromotePass())
 FUNCTION_PASS("memcpyopt", MemCpyOptPass())
 FUNCTION_PASS("memprof", MemProfilerPass())
 FUNCTION_PASS("mergeicmps", MergeICmpsPass())
-FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
 FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
 FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
 FUNCTION_PASS("newgvn", NewGVNPass())
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 0088d439e6895..ebff5af0dd7f3 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -92,7 +92,6 @@ add_llvm_component_library(LLVMTransformUtils
   StripNonLineTableDebugInfo.cpp
   SymbolRewriter.cpp
   TriggerCrashPass.cpp
-  UnifyFunctionExitNodes.cpp
   UnifyLoopExits.cpp
   Utils.cpp
   ValueMapper.cpp
diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
deleted file mode 100644
index 17fa30e436c2f..0000000000000
--- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is used to ensure that functions have at most one return and one
-// unreachable instruction in them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Type.h"
-using namespace llvm;
-
-namespace {
-
-bool unifyUnreachableBlocks(Function &F) {
-  std::vector<BasicBlock *> UnreachableBlocks;
-
-  for (BasicBlock &I : F)
-    if (isa<UnreachableInst>(I.getTerminator()))
-      UnreachableBlocks.push_back(&I);
-
-  if (UnreachableBlocks.size() <= 1)
-    return false;
-
-  BasicBlock *UnreachableBlock =
-      BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F);
-  new UnreachableInst(F.getContext(), UnreachableBlock);
-
-  for (BasicBlock *BB : UnreachableBlocks) {
-    BB->back().eraseFromParent(); // Remove the unreachable inst.
-    UncondBrInst::Create(UnreachableBlock, BB);
-  }
-
-  return true;
-}
-
-bool unifyReturnBlocks(Function &F) {
-  std::vector<BasicBlock *> ReturningBlocks;
-
-  for (BasicBlock &I : F)
-    if (isa<ReturnInst>(I.getTerminator()))
-      ReturningBlocks.push_back(&I);
-
-  if (ReturningBlocks.size() <= 1)
-    return false;
-
-  // Insert a new basic block into the function, add PHI nodes (if the function
-  // returns values), and convert all of the return instructions into
-  // unconditional branches.
-  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
-                                               "UnifiedReturnBlock", &F);
-
-  PHINode *PN = nullptr;
-  if (F.getReturnType()->isVoidTy()) {
-    ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
-  } else {
-    // If the function doesn't return void... add a PHI node to the block...
-    PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
-                         "UnifiedRetVal");
-    PN->insertInto(NewRetBlock, NewRetBlock->end());
-    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
-  }
-
-  // Loop over all of the blocks, replacing the return instruction with an
-  // unconditional branch.
-  for (BasicBlock *BB : ReturningBlocks) {
-    // Add an incoming element to the PHI node for every return instruction that
-    // is merging into this new block...
-    if (PN)
-      PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
-
-    BB->back().eraseFromParent(); // Remove the return insn
-    UncondBrInst::Create(NewRetBlock, BB);
-  }
-
-  return true;
-}
-} // namespace
-
-PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F,
-                                                  FunctionAnalysisManager &AM) {
-  bool Changed = false;
-  Changed |= unifyUnreachableBlocks(F);
-  Changed |= unifyReturnBlocks(F);
-  return Changed ? PreservedAnalyses() : PreservedAnalyses::all();
-}
diff --git a/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll b/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll
deleted file mode 100644
index a6832c79dad43..0000000000000
--- a/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; RUN: opt -passes='break-crit-edges,lower-switch,mergereturn' -S < %s | FileCheck %s
-
-; The pass did previously not report the correct Modified status in the case
-; where a function had at most one return block, and an unified unreachable
-; block was created. This was caught by the pass return status check that is
-; hidden under EXPENSIVE_CHECKS.
-
-; CHECK: for.foo.body2:
-; CHECK-NEXT: br label %UnifiedUnreachableBlock
-
-; CHECK: for.foo.end:
-; CHECK-NEXT: br label %UnifiedUnreachableBlock
-
-; CHECK: UnifiedUnreachableBlock:
-; CHECK-NEXT: unreachable
-
-define i32 @foo() {
-entry:
-  br label %for.foo.cond
-
-for.foo.cond:                                         ; preds = %entry
-  br i1 false, label %for.foo.body, label %for.foo.end3
-
-for.foo.body:                                         ; preds = %for.foo.cond
-  br label %for.foo.cond1
-
-for.foo.cond1:                                        ; preds = %for.foo.body
-  br i1 false, label %for.foo.body2, label %for.foo.end
-
-for.foo.body2:                                        ; preds = %for.foo.cond1
-  unreachable
-
-for.foo.end:                                          ; preds = %for.foo.cond1
-  unreachable
-
-for.foo.end3:                                         ; preds = %for.foo.cond
-  ret i32 undef
-}
-
-; CHECK: for.bar.body2:
-; CHECK-NEXT: br label %UnifiedUnreachableBlock
-
-; CHECK: for.bar.end:
-; CHECK-NEXT: br label %UnifiedUnreachableBlock
-
-; CHECK: UnifiedUnreachableBlock:
-; CHECK-NEXT: unreachable
-
-define void @bar() {
-entry:
-  br label %for.bar.cond
-
-for.bar.cond:                                         ; preds = %entry
-  br i1 false, label %for.bar.body, label %for.bar.end
-
-for.bar.body:                                         ; preds = %for.bar.cond
-  br label %for.bar.cond1
-
-for.bar.cond1:                                        ; preds = %for.bar.body
-  br i1 false, label %for.bar.body2, label %for.bar.end
-
-for.bar.body2:                                        ; preds = %for.bar.cond1
-  unreachable
-
-for.bar.end:                                          ; preds = %for.bar.cond1
-  unreachable
-}

>From 977eb3f0ce6c89c0de7cdf82d6ff284cd5667dd5 Mon Sep 17 00:00:00 2001
From: Fady Farag <com.webkit.iidmsa at gmail.com>
Date: Wed, 24 Jun 2026 07:41:36 -0500
Subject: [PATCH 29/42] [clang][test] Use `FileCheck` in
 `Rewriter/objc-modern-getclass-proto.mm` (#204272)

The test had `CHECK` directives that were never executed because no
`RUN` line invoked `FileCheck` on the output.

The test also used a fragile runtime, which invoked the fragile rewriter
instead of the modern one the test was written for.

Switch to a non-fragile runtime so the modern rewriter runs as the test
intended.
---
 clang/test/Rewriter/objc-modern-getclass-proto.mm | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/test/Rewriter/objc-modern-getclass-proto.mm b/clang/test/Rewriter/objc-modern-getclass-proto.mm
index da417477aa12b..81947460b4cfb 100644
--- a/clang/test/Rewriter/objc-modern-getclass-proto.mm
+++ b/clang/test/Rewriter/objc-modern-getclass-proto.mm
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -E %s -o %t.mm
-// RUN: %clang_cc1 -x objective-c++ -fblocks -fms-extensions -rewrite-objc -fobjc-runtime=macosx-fragile-10.5 %t.mm -o %t-rw.cpp
+// RUN: %clang_cc1 -x objective-c++ -fblocks -fms-extensions -rewrite-objc -fobjc-runtime=macosx-10.7 %t.mm -o %t-rw.cpp
+// RUN: FileCheck --input-file=%t-rw.cpp %s
 
 @interface I @end
 @implementation I @end

>From 83d97a53509b74a612148d7d366f8c3359237883 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Wed, 24 Jun 2026 14:48:16 +0200
Subject: [PATCH 30/42] [libc++] Move _LIBCPP_CONCAT{,3} to
 <__configuration/utility.h> (#205533)

The macro is already used in `<__configuration/attributes.h>` and just
happens to work because we include both headers.
`<__configuration/utility.h>` seems like the obvious place to put the
macros.
---
 libcxx/include/__config                  | 5 +----
 libcxx/include/__configuration/utility.h | 4 ++++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 714cd0fd26b36..802440bae5ef6 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -28,6 +28,7 @@
 #  include <__configuration/language.h>
 #  include <__configuration/namespace.h>
 #  include <__configuration/platform.h>
+#  include <__configuration/utility.h>
 
 // The attributes supported by clang are documented at https://clang.llvm.org/docs/AttributeReference.html
 
@@ -36,10 +37,6 @@
 // defined to XXYYZZ.
 #  define _LIBCPP_VERSION 230000
 
-#  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
-#  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
-#  define _LIBCPP_CONCAT3(X, Y, Z) _LIBCPP_CONCAT(X, _LIBCPP_CONCAT(Y, Z))
-
 #  ifndef __has_constexpr_builtin
 #    define __has_constexpr_builtin(x) 0
 #  endif
diff --git a/libcxx/include/__configuration/utility.h b/libcxx/include/__configuration/utility.h
index 81e91887614d3..2f06af151dd05 100644
--- a/libcxx/include/__configuration/utility.h
+++ b/libcxx/include/__configuration/utility.h
@@ -19,4 +19,8 @@
 #define _LIBCPP_TOSTRING2(x) #x
 #define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x)
 
+#define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
+#define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
+#define _LIBCPP_CONCAT3(X, Y, Z) _LIBCPP_CONCAT(X, _LIBCPP_CONCAT(Y, Z))
+
 #endif // _LIBCPP___CONFIGURATION_UTILITY_H

>From fd1f64e915edcfa19a9e0f7f4ca4b6a4cc664e92 Mon Sep 17 00:00:00 2001
From: "Ivan R. Ivanov" <iivanov at nvidia.com>
Date: Wed, 24 Jun 2026 14:50:35 +0200
Subject: [PATCH 31/42] [offload][unittest] Set rpath to make sure tests use
 the correct lib (#205542)

Depending on system setup, the unit tests could not find the just built
libLLVMOffload.so. Set the BUILD_RPATH to make sure they correctly use
the library from the current build.
---
 offload/unittests/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index f02571c8d0bc8..b08f7213d5344 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -146,6 +146,7 @@ function(add_offload_unittest test_dirname)
   target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
   target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
   target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
+  set_target_properties(${target_name} PROPERTIES BUILD_RPATH "${LIBOMPTARGET_LIBRARY_DIR}")
 endfunction()
 
 function(add_conformance_test test_name)

>From 432f9f8aeadb3819bead82ab26f32ca983f91084 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Wed, 24 Jun 2026 14:52:28 +0200
Subject: [PATCH 32/42] [libc++] Move <features.h> include to
 <__configuration/platform.h> (#205548)

Including `<features.h>` is platform-specific configuration and should
therefore be in `<__configuration/platform.h>`.
---
 libcxx/include/__configuration/platform.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/libcxx/include/__configuration/platform.h b/libcxx/include/__configuration/platform.h
index 644fe1724e42e..ebd638e663215 100644
--- a/libcxx/include/__configuration/platform.h
+++ b/libcxx/include/__configuration/platform.h
@@ -30,6 +30,10 @@
 // ... add new file formats here ...
 #endif
 
+#if defined(__MVS__)
+#  include <features.h> // for __NATIVE_ASCII_F
+#endif
+
 // Need to detect which libc we're using if we're on Linux.
 #if (defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__)) && __has_include(<features.h>)
 #  include <features.h>

>From 09939ace888b407dc984a7fc6c980fd6064a75c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 24 Jun 2026 14:53:15 +0200
Subject: [PATCH 33/42] [NFC][analyzer] Remove the NodeBuilder from eagerly
 assume (#204371)

Part of my commit series to gradually eliminate the class `NodeBuilder`.
Admittedly this is one of the few places where the implementation with
the `NodeBuilder` is more concise than the new code.

This is caused by two factors:
1. This is an optional step in the analysis, so the "put source nodes in
destination unless we generate a child node from them" behavior of
`NodeBuilder` -- which is often completely useless -- was helpful on two
branches.
2. Making nodes with tags is very rare, so I intentionally did not
include support for tagging in `makeNodeWithBinding` -- but this is one
of the few places where tags are applied.
---
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 4357f0fae4144..cfb294736ee02 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -3711,20 +3711,20 @@ REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeExprIfSuccessful,
 void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
                                               ExplodedNodeSet &Src,
                                               const Expr *Ex) {
-  NodeBuilder Bldr(Src, Dst, *currBldrCtx);
-
   for (ExplodedNode *Pred : Src) {
+    const StackFrame *SF = Pred->getStackFrame();
     // Test if the previous node was as the same expression.  This can happen
     // when the expression fails to evaluate to anything meaningful and
     // (as an optimization) we don't generate a node.
     ProgramPoint P = Pred->getLocation();
     if (!P.getAs<PostStmt>() || P.castAs<PostStmt>().getStmt() != Ex) {
+      Dst.insert(Pred);
       continue;
     }
 
     ProgramStateRef State = Pred->getState();
     State = State->set<LastEagerlyAssumeExprIfSuccessful>(nullptr);
-    SVal V = State->getSVal(Ex, Pred->getStackFrame());
+    SVal V = State->getSVal(Ex, SF);
     std::optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>();
     if (SEV && SEV->isExpression()) {
       const auto &[TrueTag, FalseTag] = getEagerlyAssumeBifurcationTags();
@@ -3739,16 +3739,20 @@ void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
       // First assume that the condition is true.
       if (StateTrue) {
         SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
-        StateTrue = StateTrue->BindExpr(Ex, Pred->getStackFrame(), Val);
-        Bldr.generateNode(Ex, Pred, StateTrue, TrueTag);
+        StateTrue = StateTrue->BindExpr(Ex, SF, Val);
+        PostStmt PostStmtTrue(Ex, SF, TrueTag);
+        Dst.insert(Engine.makeNode(PostStmtTrue, StateTrue, Pred));
       }
 
       // Next, assume that the condition is false.
       if (StateFalse) {
         SVal Val = svalBuilder.makeIntVal(0U, Ex->getType());
-        StateFalse = StateFalse->BindExpr(Ex, Pred->getStackFrame(), Val);
-        Bldr.generateNode(Ex, Pred, StateFalse, FalseTag);
+        StateFalse = StateFalse->BindExpr(Ex, SF, Val);
+        PostStmt PostStmtFalse(Ex, SF, FalseTag);
+        Dst.insert(Engine.makeNode(PostStmtFalse, StateFalse, Pred));
       }
+    } else {
+      Dst.insert(Pred);
     }
   }
 }

>From 6b349a9d00b366a90dc9b79e2b5543fc8774b019 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Wed, 24 Jun 2026 14:58:42 +0200
Subject: [PATCH 34/42] [libc++] Remove <features.h> include from <__config>
 (#205549)

The include was moved to `<__configuration/platform.h>` in #205548,
which was also supposed to remove the include in `<__config>`.
---
 libcxx/include/__config | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 802440bae5ef6..fc7a121d52783 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -66,10 +66,6 @@
 #    define _LIBCPP_ABI_VCRUNTIME
 #  endif
 
-#  if defined(__MVS__)
-#    include <features.h> // for __NATIVE_ASCII_F
-#  endif
-
 #  if defined(_WIN32)
 #    define _LIBCPP_WIN32API
 #    define _LIBCPP_SHORT_WCHAR 1

>From 5e165a4896ebde3233e32c6df423c884939f3bd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= <andrzej.warzynski at arm.com>
Date: Wed, 24 Jun 2026 14:08:28 +0100
Subject: [PATCH 35/42] [clang][ARM] Delete dead-code (nfc) (#205404)

Removes dead code that I accidentally introduced in #195825/. Thank you
@shafik for pointing this out!
---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index aa32bc2a1d5a7..9c41a807c62cb 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -6548,7 +6548,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     VTy = llvm::FixedVectorType::get(HalfTy, 4);
     llvm::Type *Tys[2] = {Ty, VTy};
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
-    return Builder.CreateTrunc(Ops[0], HalfTy);
   }
   case NEON::BI__builtin_neon_vminnmvq_f16: {
     Int = Intrinsic::aarch64_neon_fminnmv;

>From 4adb32bbca24fbcd247e0725992024d922a94303 Mon Sep 17 00:00:00 2001
From: Tony Guillot <tony.guillot at protonmail.com>
Date: Wed, 24 Jun 2026 15:31:56 +0200
Subject: [PATCH 36/42] [Clang][Docs] Fixed typos of sentinel attribute
 (#205539)

I have previously documented the sentinel attribute but some typos have
been missed during the review process.
---
 clang/include/clang/Basic/AttrDocs.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 0f1a66ec34197..7c1c88241aaa8 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -10200,7 +10200,7 @@ call. The attribute accepts two optional arguments: the first argument is the
 position of the expected sentinel value, starting from the last parameter. The
 second argument describes whether the last fixed parameter is treated as a
 valid sentinel value when set to '1'.
-All arguments described above defaults to '0' when elided.
+All arguments described above default to '0' when elided.
 The attribute is also supported with blocks and in Objective-C.
 
 .. code-block:: c
@@ -10214,7 +10214,7 @@ The attribute is also supported with blocks and in Objective-C.
     foo("Another", "example", NULL);
     foo("Missing", "sentinel"); // Not OK
 
-    bar(1, 2, NULL, 3);         // OK: sentinel value at the 2nd to last positon
+    bar(1, 2, NULL, 3);         // OK: sentinel value at the 2nd to last position
     bar(1, 2, 3, nullptr, 4);   // OK: `nullptr` is valid in C23
     bar(1, 2, 3, 4, NULL);      // Not OK
 

>From d089c9add10d5604a397e9af6359a139393420fe Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 24 Jun 2026 14:44:59 +0100
Subject: [PATCH 37/42] [X86] madd.ll - add additional load test for
 matchPMADDWD folds that fail with irregular source types (#205554)

Ensure #205391 doesn't crash with non-pow2/illegal types
---
 llvm/test/CodeGen/X86/madd.ll | 53 +++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll
index 63b390f4b9bdf..5912e6aa3a3c4 100644
--- a/llvm/test/CodeGen/X86/madd.ll
+++ b/llvm/test/CodeGen/X86/madd.ll
@@ -3901,6 +3901,59 @@ define <4 x i32> @oddvector_sext(<13 x i16> %A) {
    ret <4 x i32> %ret
 }
 
+define <4 x i32> @oddvector_sext_load(ptr %p) {
+; SSE2-LABEL: oddvector_sext_load:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE2-NEXT:    paddd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: oddvector_sext_load:
+; SSE42:       # %bb.0:
+; SSE42-NEXT:    movdqa (%rdi), %xmm1
+; SSE42-NEXT:    pmovsxwd %xmm1, %xmm0
+; SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; SSE42-NEXT:    pmovsxwd %xmm1, %xmm1
+; SSE42-NEXT:    phaddd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: oddvector_sext_load:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm1
+; AVX1-NEXT:    vphaddd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: oddvector_sext_load:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpmovsxwd (%rdi), %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: oddvector_sext_load:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmovsxwd (%rdi), %zmm0
+; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+   %A = load <13 x i16>, ptr %p
+   %a = sext <13 x i16> %A to <13 x i32>
+   %odd = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+   %even = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+   %ret = add <4 x i32> %odd, %even
+   ret <4 x i32> %ret
+}
+
 define <3 x i32> @oddvector_shl(<12 x i16> %A) {
 ; SSE2-LABEL: oddvector_shl:
 ; SSE2:       # %bb.0:

>From e68e8d35c91b4fd3ba0ae3ef12d79b41d92580b2 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Wed, 24 Jun 2026 08:51:36 -0500
Subject: [PATCH 38/42] [flang][OpenMP] Parsing and semantics of locators as
 part of OmpObject (#203910)

Allow function call references and reserved locator names as parts of
OmpObject. Function calls and array element accesses have the same
syntax, and the OmpObject parser will parse them as function calls. This
is then corrected (if needed) immediately after the name resolution is
complete.

There are no clause-specific semantic checks of proper locators.
Existing code will check if a proper locator is specified on a clause
that allows it.

Lowering of proper locators to MLIR is not implemented, and a TODO
message is emitted.
---
 flang/include/flang/Parser/dump-parse-tree.h  |  2 +-
 flang/include/flang/Parser/parse-tree.h       | 23 +++++----
 flang/include/flang/Semantics/expression.h    |  4 +-
 flang/include/flang/Semantics/symbol.h        |  4 +-
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    | 22 ++++++++
 flang/lib/Lower/OpenMP/Clauses.cpp            | 51 +++++++++++++++----
 flang/lib/Parser/openmp-parsers.cpp           | 37 +++++++++-----
 flang/lib/Parser/openmp-utils.cpp             | 12 +++--
 flang/lib/Parser/unparse.cpp                  |  2 +-
 flang/lib/Semantics/check-omp-loop.cpp        |  2 +-
 flang/lib/Semantics/check-omp-structure.cpp   | 28 ++++++++--
 flang/lib/Semantics/check-omp-structure.h     |  1 +
 flang/lib/Semantics/check-omp-variant.cpp     |  2 +-
 flang/lib/Semantics/openmp-utils.cpp          | 48 ++++++++++++++---
 flang/lib/Semantics/resolve-directives.cpp    |  3 ++
 flang/lib/Semantics/resolve-names.cpp         | 16 +++++-
 flang/lib/Semantics/rewrite-parse-tree.cpp    | 17 +++++++
 .../OpenMP/Todo/locator-call-affinity.f90     | 13 +++++
 .../Lower/OpenMP/Todo/locator-call-from.f90   | 12 +++++
 .../Lower/OpenMP/Todo/locator-call-map.f90    | 13 +++++
 .../Lower/OpenMP/Todo/locator-call-to.f90     | 12 +++++
 .../Lower/OpenMP/Todo/locator-reserved.f90    | 11 ++++
 .../Parser/OpenMP/allocate-align-tree.f90     |  4 +-
 .../Parser/OpenMP/allocate-tree-spec-part.f90 |  8 +--
 flang/test/Parser/OpenMP/allocate-tree.f90    |  6 +--
 flang/test/Parser/OpenMP/declare-variant.f90  |  8 +--
 flang/test/Parser/OpenMP/depobj-construct.f90 |  8 +--
 flang/test/Parser/OpenMP/groupprivate.f90     |  6 +--
 .../Parser/OpenMP/metadirective-dirspec.f90   | 14 ++---
 .../Parser/OpenMP/metadirective-flush.f90     |  4 +-
 .../OpenMP/openmp6-directive-spellings.f90    |  2 +-
 flang/test/Parser/OpenMP/threadprivate.f90    |  4 +-
 .../Semantics/OpenMP/affinity-invalid.f90     |  8 ---
 .../Semantics/OpenMP/depend-substring.f90     |  9 ----
 .../Semantics/OpenMP/reserved-locator.f90     |  6 +++
 llvm/include/llvm/Frontend/OpenMP/OMP.h       |  2 +
 llvm/lib/Frontend/OpenMP/OMP.cpp              |  6 +++
 37 files changed, 320 insertions(+), 110 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-call-affinity.f90
 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-call-from.f90
 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-call-map.f90
 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-call-to.f90
 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-reserved.f90
 create mode 100644 flang/test/Semantics/OpenMP/reserved-locator.f90

diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 1205101c21fcf..ceba23d7d4706 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -667,7 +667,6 @@ class ParseTreeDumper {
   NODE(parser, OmpLinearModifier)
   NODE_ENUM(OmpLinearModifier, Value)
   NODE(parser, OmpLocator)
-  NODE(parser, OmpLocatorList)
   NODE(parser, OmpLooprangeClause)
   NODE(parser, OmpLowerBound)
   NODE(parser, OmpMapClause)
@@ -722,6 +721,7 @@ class ParseTreeDumper {
   NODE_ENUM(OmpRefModifier, Value)
   NODE(parser, OmpReplayableClause)
   NODE(parser, OmpRequiresDirective)
+  NODE(parser, OmpReservedIdentifier)
   NODE(parser, OmpReverseOffloadClause)
   NODE(parser, OmpScheduleClause)
   NODE(OmpScheduleClause, Modifier)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index ee6288539395c..ea4ce1882eb1b 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3599,6 +3599,16 @@ struct OmpTypeNameList {
   WRAPPER_CLASS_BOILERPLATE(OmpTypeNameList, std::list<OmpTypeName>);
 };
 
+struct OmpReservedIdentifier {
+  WRAPPER_CLASS_BOILERPLATE(OmpReservedIdentifier, Name);
+};
+
+// "Proper" locator, i.e. a function reference or a reserved locator.
+struct OmpLocator {
+  UNION_CLASS_BOILERPLATE(OmpLocator);
+  std::variant<FunctionReference, OmpReservedIdentifier> u;
+};
+
 // 2.1 Directives or clauses may accept a list or extended-list.
 //     A list item is a variable, array section or common block name (enclosed
 //     in slashes). An extended list item is a list item or a procedure Name.
@@ -3612,7 +3622,7 @@ struct OmpObject {
     CharBlock source;
   };
   UNION_CLASS_BOILERPLATE(OmpObject);
-  std::variant<Designator, /*common block*/ Name, Invalid> u;
+  std::variant<Designator, OmpLocator, Name, Invalid> u;
 };
 
 struct OmpObjectList {
@@ -3688,15 +3698,6 @@ struct OmpInitializerExpression : public OmpStylizedExpression {
 };
 
 inline namespace arguments {
-struct OmpLocator {
-  UNION_CLASS_BOILERPLATE(OmpLocator);
-  std::variant<OmpObject, FunctionReference> u;
-};
-
-struct OmpLocatorList {
-  WRAPPER_CLASS_BOILERPLATE(OmpLocatorList, std::list<OmpLocator>);
-};
-
 // Ref: [4.5:58-60], [5.0:58-60], [5.1:63-68], [5.2:197-198], [6.0:334-336]
 //
 // Argument to DECLARE VARIANT with the base-name present. (When only
@@ -3737,7 +3738,7 @@ struct OmpReductionSpecifier {
 struct OmpArgument {
   CharBlock source;
   UNION_CLASS_BOILERPLATE(OmpArgument);
-  std::variant<OmpLocator, // {variable, extended, locator}-list-item
+  std::variant<OmpObject,
       OmpBaseVariantNames, // base-name:variant-name
       OmpMapperSpecifier, OmpReductionSpecifier>
       u;
diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h
index 598cb31e851f8..75468d683af48 100644
--- a/flang/include/flang/Semantics/expression.h
+++ b/flang/include/flang/Semantics/expression.h
@@ -252,6 +252,8 @@ class ExpressionAnalyzer {
   MaybeExpr Analyze(const parser::InitialDataTarget &);
   MaybeExpr Analyze(const parser::NullInit &);
   MaybeExpr Analyze(const parser::StmtFunctionStmt &);
+  MaybeExpr Analyze(const parser::FunctionReference &,
+      std::optional<parser::StructureConstructor> * = nullptr);
 
   void Analyze(const parser::CallStmt &);
   const Assignment *Analyze(const parser::AssignmentStmt &);
@@ -293,8 +295,6 @@ class ExpressionAnalyzer {
   MaybeExpr Analyze(const parser::CharLiteralConstantSubstring &);
   MaybeExpr Analyze(const parser::SubstringInquiry &);
   MaybeExpr Analyze(const parser::ArrayConstructor &);
-  MaybeExpr Analyze(const parser::FunctionReference &,
-      std::optional<parser::StructureConstructor> * = nullptr);
   MaybeExpr Analyze(const parser::Expr::Parentheses &);
   MaybeExpr Analyze(const parser::Expr::UnaryPlus &);
   MaybeExpr Analyze(const parser::Expr::Negate &);
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index 23c26ba733e86..e0511b5b4803d 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -925,8 +925,8 @@ class Symbol {
       // OpenMP special variables
       OmpInVar, OmpOrigVar, OmpOutVar, OmpPrivVar,
       // OpenMP miscellaneous flags
-      OmpCommonBlock, OmpReduction, OmpInReduction, OmpAligned, OmpNontemporal,
-      OmpAllocate, OmpDeclarativeAllocateDirective,
+      OmpReserved, OmpCommonBlock, OmpReduction, OmpInReduction, OmpAligned,
+      OmpNontemporal, OmpAllocate, OmpDeclarativeAllocateDirective,
       OmpExecutableAllocateDirective, OmpDeclareSimd, OmpDeclareTarget,
       OmpThreadprivate, OmpDeclareReduction, OmpFlushed, OmpCriticalLock,
       OmpIfSpecified, OmpNone, OmpPreDetermined, OmpExplicit, OmpImplicit,
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 4f19dfb98024d..f1ccb64e3dfb3 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -29,6 +29,24 @@ namespace Fortran {
 namespace lower {
 namespace omp {
 
+static void TodoLocators(mlir::Location loc, const omp::ObjectList &objects) {
+  for (const omp::Object &object : objects) {
+    if (auto &ref = object.ref()) {
+      auto op = GetTopLevelOperation(*ref).first;
+      if (op == evaluate::operation::Operator::Call)
+        TODO(loc, "Function call locators are not supported yet");
+    }
+    semantics::Symbol *symbol = object.sym();
+    if (symbol->test(semantics::Symbol::Flag::OmpReserved)) {
+      std::string name =
+          parser::ToLowerCaseLetters(object.sym()->name().ToString());
+      if (llvm::is_contained(llvm::omp::getReservedLocatorNames(), name)) {
+        TODO(loc, "Reserved locators are not supported yet");
+      }
+    }
+  }
+}
+
 using ReductionModifier =
     Fortran::lower::omp::clause::Reduction::ReductionModifier;
 
@@ -971,6 +989,8 @@ bool ClauseProcessor::processAffinity(
             std::get<std::optional<omp::clause::Iterator>>(clause.t);
         collectIteratorIVs(clause, converter, stmtCtx, iteratorRanges, ivSyms);
 
+        TodoLocators(clauseLocation, objects);
+
         for (const omp::Object &object : objects) {
           llvm::SmallVector<mlir::Value> bounds;
           std::stringstream asFortran;
@@ -1972,6 +1992,7 @@ bool ClauseProcessor::processMap(
     if (iterator)
       TODO(currentLocation,
            "Support for iterator modifiers is not implemented yet");
+    TodoLocators(currentLocation, objects);
 
     processMapObjects(stmtCtx, clauseLocation,
                       std::get<omp::ObjectList>(clause.t), mapTypeBits,
@@ -2007,6 +2028,7 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx,
 
     if (iterator)
       TODO(clauseLocation, "Iterator modifier is not supported yet");
+    TodoLocators(clauseLocation, objects);
 
     processMapObjects(stmtCtx, clauseLocation, objects, mapTypeBits,
                       parentMemberIndices, result.mapVars, mapObjects,
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index 03852dc9e7d74..34dc6ed56b435 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -57,6 +57,12 @@ struct SymbolAndDesignatorExtractor {
                            evaluate::AsGenericExpr(AsRvalueRef(e)));
   }
 
+  template <typename T>
+  static SymbolWithDesignator visit(const evaluate::FunctionRef<T> &e) {
+    return std::make_tuple(symbol_addr(*e.proc().GetSymbol()),
+                           evaluate::AsGenericExpr(AsRvalueRef(e)));
+  }
+
   static SymbolWithDesignator visit(const evaluate::ProcedureDesignator &e) {
     return std::make_tuple(symbol_addr(*e.GetSymbol()), std::nullopt);
   }
@@ -78,12 +84,16 @@ struct SymbolAndDesignatorExtractor {
     if (maybeRef) {
       if (&maybeRef->GetLastSymbol() == symbol)
         return; // Symbol with a designator for it -> OK
-      llvm_unreachable("Expecting designator for given symbol");
+      llvm_unreachable("Symbol mismatch");
+    } else if (auto *ref = evaluate::UnwrapProcedureRef(*maybeDsg)) {
+      if (ref->proc().GetSymbol() == symbol)
+        return;
+      llvm_unreachable("Symbol mismatch");
     } else {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
       maybeDsg->dump();
 #endif
-      llvm_unreachable("Expecting DataRef designator");
+      llvm_unreachable("Unexpected expression");
     }
   }
 };
@@ -110,6 +120,14 @@ Object makeObject(const parser::Designator &dsg,
   return Object{std::get<0>(sd), std::move(std::get<1>(sd))};
 }
 
+Object makeObject(const parser::FunctionReference &ref,
+                  semantics::SemanticsContext &semaCtx) {
+  evaluate::ExpressionAnalyzer ea{semaCtx};
+  SymbolWithDesignator sd = getSymbolAndDesignator(ea.Analyze(ref));
+  SymbolAndDesignatorExtractor::verify(sd);
+  return Object{std::get<0>(sd), std::move(std::get<1>(sd))};
+}
+
 Object makeObject(const parser::StructureComponent &comp,
                   semantics::SemanticsContext &semaCtx) {
   evaluate::ExpressionAnalyzer ea{semaCtx};
@@ -127,8 +145,24 @@ Object makeObject(const parser::OmpObject &object,
     assert(name->symbol && "Expecting Symbol");
     return Object{name->symbol, std::nullopt};
   }
-  // OmpObject is std::variant<Designator, /*common block*/ Name>;
-  return makeObject(std::get<parser::Designator>(object.u), semaCtx);
+  assert(!std::holds_alternative<parser::OmpObject::Invalid>(object.u) &&
+         "Invalid object should have been caught in semantics");
+  // OmpObject is std::variant<Designator, OmpLocator, Name, Invalid>;
+  if (auto *desg = std::get_if<parser::Designator>(&object.u))
+    return makeObject(*desg, semaCtx);
+  if (auto *locator = std::get_if<parser::OmpLocator>(&object.u)) {
+    return common::visit( //
+        common::visitors{
+            [&](const parser::OmpReservedIdentifier &x) {
+              return makeObject(x.v, semaCtx);
+            },
+            [&](const parser::FunctionReference &x) {
+              return makeObject(x, semaCtx);
+            },
+        },
+        locator->u);
+  }
+  llvm_unreachable("Unexpected OmpObject");
 }
 
 Object makeObject(const parser::EntityDecl &decl,
@@ -139,13 +173,10 @@ Object makeObject(const parser::EntityDecl &decl,
 ObjectList makeObjects(const parser::OmpArgumentList &objects,
                        semantics::SemanticsContext &semaCtx) {
   return makeList(objects.v, [&](const parser::OmpArgument &arg) {
-    return common::visit(
+    return common::visit( //
         common::visitors{
-            [&](const parser::OmpLocator &locator) -> Object {
-              if (auto *object = std::get_if<parser::OmpObject>(&locator.u)) {
-                return makeObject(*object, semaCtx);
-              }
-              llvm_unreachable("Expecting object");
+            [&](const parser::OmpObject &object) -> Object {
+              return makeObject(object, semaCtx);
             },
             [](auto &&s) -> Object { //
               llvm_unreachable("Expecting object");
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 7016c688a572d..8a18bcc9e4485 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -281,6 +281,27 @@ TYPE_PARSER(construct<common::OmpMemoryOrderType>(
     "RELEASE" >> pure(common::OmpMemoryOrderType::Release) ||
     "SEQ_CST" >> pure(common::OmpMemoryOrderType::Seq_Cst)))
 
+static bool IsReservedName(const Name &name) {
+  llvm::StringRef s(name.source.begin(), name.source.size());
+  return s.starts_with_insensitive("OMP_") ||
+      s.starts_with_insensitive("OMPX_");
+}
+
+TYPE_PARSER( //
+    construct<OmpReservedIdentifier>(predicated(name, IsReservedName)))
+
+// Parse x(...)(...) as a substring instead of a function reference.
+TYPE_PARSER( //
+    construct<OmpLocator>(functionReference / !lookAhead("("_tok)) ||
+    construct<OmpLocator>(Parser<OmpReservedIdentifier>{}))
+
+TYPE_PARSER( //
+    construct<OmpObject>(Parser<OmpLocator>{}) ||
+    construct<OmpObject>(designator) ||
+    "/" >> construct<OmpObject>(name) / "/" ||
+    construct<OmpObject>(sourced(construct<OmpObject::Invalid>(
+        "//"_tok >> pure(OmpObject::Invalid::Kind::BlankCommonBlock)))))
+
 // --- Modifier helpers -----------------------------------------------
 
 template <typename Clause, typename Separator> struct ModifierList {
@@ -588,10 +609,6 @@ TYPE_PARSER( //
 // At the moment these are only directive arguments. This is needed for
 // parsing directive-specification.
 
-TYPE_PARSER( //
-    construct<OmpLocator>(Parser<OmpObject>{}) ||
-    construct<OmpLocator>(Parser<FunctionReference>{}))
-
 TYPE_PARSER(construct<OmpBaseVariantNames>(
     Parser<OmpObject>{} / ":", Parser<OmpObject>{}))
 
@@ -610,7 +627,7 @@ struct OmpArgumentParser {
         construct<OmpArgument>(Parser<OmpMapperSpecifier>{}),
         // By default, prefer OmpReductionSpecifier over OmpBaseVariantNames.
         construct<OmpArgument>(Parser<OmpReductionSpecifier>{}),
-        construct<OmpArgument>(Parser<OmpLocator>{})))};
+        construct<OmpArgument>(Parser<OmpObject>{})))};
     return parser.Parse(state);
   }
 };
@@ -625,13 +642,11 @@ struct OmpArgumentParser<llvm::omp::Directive::OMPD_declare_variant> {
         // In DECLARE_VARIANT parse OmpBaseVariantNames instead of
         // OmpReductionSpecifier.
         construct<OmpArgument>(Parser<OmpBaseVariantNames>{}),
-        construct<OmpArgument>(Parser<OmpLocator>{})))};
+        construct<OmpArgument>(Parser<OmpObject>{})))};
     return parser.Parse(state);
   }
 };
 
-TYPE_PARSER(construct<OmpLocatorList>(nonemptyList(Parser<OmpLocator>{})))
-
 template <llvm::omp::Directive Id = llvm::omp::Directive::OMPD_unknown>
 struct OmpArgumentListParser {
   using resultType = OmpArgumentList;
@@ -1405,12 +1420,6 @@ TYPE_PARSER(construct<OmpNumThreadsClause>(
     maybe(nonemptyList(Parser<OmpNumThreadsClause::Modifier>{}) / ":"),
     nonemptyList(scalarIntExpr)))
 
-TYPE_PARSER( //
-    construct<OmpObject>(designator) ||
-    "/" >> construct<OmpObject>(name) / "/" ||
-    construct<OmpObject>(sourced(construct<OmpObject::Invalid>(
-        "//"_tok >> pure(OmpObject::Invalid::Kind::BlankCommonBlock)))))
-
 // OMP 5.0 2.19.4.5 LASTPRIVATE ([lastprivate-modifier :] list)
 TYPE_PARSER(construct<OmpLastprivateClause>(
     maybe(nonemptyList(Parser<OmpLastprivateClause::Modifier>{}) / ":"),
diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp
index ec312bcf3ebfd..ca563f2df892d 100644
--- a/flang/lib/Parser/openmp-utils.cpp
+++ b/flang/lib/Parser/openmp-utils.cpp
@@ -54,16 +54,20 @@ std::optional<parser::CharBlock> GetObjectSource(
     return name->source;
   } else if (auto *desg{std::get_if<parser::Designator>(&object.u)}) {
     return GetLastName(*desg).source;
+  } else if (auto *locator{std::get_if<parser::OmpLocator>(&object.u)}) {
+    return common::visit( //
+        common::visitors{
+            [](const parser::OmpReservedIdentifier &x) { return x.v.source; },
+            [](const parser::FunctionReference &x) { return x.source; },
+        },
+        locator->u);
   }
   return std::nullopt;
 }
 
 const parser::OmpObject *GetArgumentObject(
     const parser::OmpArgument &argument) {
-  if (auto *locator{std::get_if<parser::OmpLocator>(&argument.u)}) {
-    return std::get_if<parser::OmpObject>(&locator->u);
-  }
-  return nullptr;
+  return std::get_if<parser::OmpObject>(&argument.u);
 }
 
 namespace detail {
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 42f042e470e81..7d0038767a9c4 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2515,7 +2515,7 @@ class UnparseVisitor {
   void Unparse(const OmpObject &x) {
     common::visit( //
         common::visitors{
-            [&](const Designator &y) { Walk(y); },
+            [&](const auto &y) { Walk(y); },
             [&](const Name &y) {
               Put("/");
               Walk(y);
diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp
index 3e1f2e6cdf5d0..c77c1c53f4813 100644
--- a/flang/lib/Semantics/check-omp-loop.cpp
+++ b/flang/lib/Semantics/check-omp-loop.cpp
@@ -665,7 +665,7 @@ void OmpStructureChecker::CheckScanModifier(
               [&](const parser::Name &name) {
                 checkReductionSymbolInScan(name);
               },
-              [&](const parser::OmpObject::Invalid &invalid) {},
+              [&](const auto &) {},
           },
           ompObj.u);
     }
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 816b8fd2f149d..81600fa1ddbb9 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -414,12 +414,18 @@ void OmpStructureChecker::AnalyzeObject(const parser::OmpObject &object) {
       }
     }
   }
+
   evaluate::ExpressionAnalyzer ea{context_};
   auto restore{ea.AllowWholeAssumedSizeArray(true)};
   common::visit( //
       common::visitors{
           [&](auto &&s) { ea.Analyze(s); },
-          [&](const parser::OmpObject::Invalid &invalid) {},
+          [&](const parser::OmpLocator &x) {
+            if (auto *ref{std::get_if<parser::FunctionReference>(&x.u)}) {
+              ea.Analyze(*ref);
+            }
+          },
+          [&](const parser::OmpObject::Invalid &) {},
       },
       object.u);
 }
@@ -616,6 +622,16 @@ bool OmpStructureChecker::HasRequires(llvm::omp::Clause req) {
       DEREF(unit.symbol()).details());
 }
 
+void OmpStructureChecker::Enter(const parser::OmpLocator &x) {
+  if (auto *reserved{parser::Unwrap<parser::OmpReservedIdentifier>(x.u)}) {
+    std::string name{parser::ToLowerCaseLetters(reserved->v.source.ToString())};
+    if (!llvm::is_contained(llvm::omp::getReservedLocatorNames(), name)) {
+      context_.Say(reserved->v.source, "'%s' is not a valid locator"_err_en_US,
+          parser::ToUpperCaseLetters(name));
+    }
+  }
+}
+
 void OmpStructureChecker::CheckArgumentObjectKind(const parser::OmpClause &x) {
   unsigned version{context_.langOptions().OpenMPVersion};
   llvm::omp::Directive dirId{GetContext().directive};
@@ -1671,7 +1687,8 @@ void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar(
   common::visit( //
       common::visitors{
           [&](auto &&s) { CheckThreadprivateOrDeclareTargetVar(s); },
-          [&](const parser::OmpObject::Invalid &invalid) {},
+          [&](const parser::OmpLocator &) {},
+          [&](const parser::OmpObject::Invalid &) {},
       },
       object.u);
 }
@@ -1685,10 +1702,10 @@ void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar(
 
 void OmpStructureChecker::Enter(const parser::OmpGroupprivateDirective &x) {
   for (const parser::OmpArgument &arg : x.v.Arguments().v) {
-    auto *locator{std::get_if<parser::OmpLocator>(&arg.u)};
+    auto *object{std::get_if<parser::OmpObject>(&arg.u)};
     const Symbol *sym{GetArgumentSymbol(arg, /*ultimate=*/true)};
 
-    if (!locator || !sym ||
+    if (!object || !sym ||
         (!IsVariableListItem(*sym) && !IsCommonBlock(*sym))) {
       context_.Say(arg.source,
           "GROUPPRIVATE argument should be a variable or a named common block"_err_en_US);
@@ -3557,7 +3574,8 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &x) {
                       }
                     }
                   },
-                  [&](const parser::OmpObject::Invalid &invalid) {},
+                  [&](const parser::OmpLocator &) {},
+                  [&](const parser::OmpObject::Invalid &) {},
               },
               ompObject.u);
         }
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 8d9b25fb2a11d..4499e2a213384 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -131,6 +131,7 @@ class OmpStructureChecker : public OmpStructureCheckerBase {
   void Enter(const parser::OpenMPCriticalConstruct &);
   void Enter(const parser::OpenMPAtomicConstruct &);
 
+  void Enter(const parser::OmpLocator &x);
   void Enter(const parser::OmpClauseList &);
   void Leave(const parser::OmpClauseList &);
   void Enter(const parser::OmpClause &);
diff --git a/flang/lib/Semantics/check-omp-variant.cpp b/flang/lib/Semantics/check-omp-variant.cpp
index c681cd601b856..8b782030c37e3 100644
--- a/flang/lib/Semantics/check-omp-variant.cpp
+++ b/flang/lib/Semantics/check-omp-variant.cpp
@@ -688,7 +688,7 @@ void OmpStructureChecker::CheckOmpDeclareVariantDirective(
             CheckProcedureSymbol(base, arg.source);
             CheckProcedureSymbol(variant, arg.source);
           },
-          [&](const parser::OmpLocator &y) {
+          [&](const parser::OmpObject &y) {
             variant = GetArgumentSymbol(arg);
             CheckProcedureSymbol(variant, arg.source);
             const Scope &containingScope{context_.FindScope(x.source)};
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index 1aa27a5fe6074..0556920877f45 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -121,6 +121,21 @@ std::string TryVersion(unsigned version) {
   return "try -fopenmp-version=" + std::to_string(version);
 }
 
+static const Symbol *GetFunctionReferenceSymbol(
+    const parser::FunctionReference &ref) {
+  auto &proc{std::get<parser::ProcedureDesignator>(ref.v.t)};
+  return common::visit(
+      common::visitors{
+          [](const parser::Name &x) { return x.symbol; },
+          [](const parser::ProcComponentRef &x) {
+            return parser::UnwrapRef<parser::StructureComponent>(x.v)
+                .Component()
+                .symbol;
+          },
+      },
+      proc.u);
+}
+
 const Symbol *GetObjectSymbol(const parser::OmpObject &object, bool ultimate) {
   // Some symbols may be missing if the resolution failed, e.g. when an
   // undeclared name is used with implicit none.
@@ -137,16 +152,28 @@ const Symbol *GetObjectSymbol(const parser::OmpObject &object, bool ultimate) {
     } else {
       return last.symbol;
     }
+  } else if (auto *locator{std::get_if<parser::OmpLocator>(&object.u)}) {
+    const Symbol *sym = common::visit( //
+        common::visitors{
+            [](const parser::OmpReservedIdentifier &x) { return x.v.symbol; },
+            [](const parser::FunctionReference &x) {
+              return GetFunctionReferenceSymbol(x);
+            },
+        },
+        locator->u);
+    if (sym && ultimate) {
+      return &sym->GetUltimate();
+    } else {
+      return sym;
+    }
   }
   return nullptr;
 }
 
 const Symbol *GetArgumentSymbol(
     const parser::OmpArgument &argument, bool ultimate) {
-  if (auto *locator{std::get_if<parser::OmpLocator>(&argument.u)}) {
-    if (auto *object{std::get_if<parser::OmpObject>(&locator->u)}) {
-      return GetObjectSymbol(*object, ultimate);
-    }
+  if (auto *object{GetArgumentObject(argument)}) {
+    return GetObjectSymbol(*object, ultimate);
   }
   return nullptr;
 }
@@ -233,17 +260,21 @@ bool IsExtendedListItem(
   if (IsVariableListItem(object, semaCtx)) {
     return true;
   }
-  if (auto *sym{GetObjectSymbol(object, /*ultimate=*/true)}) {
-    return IsProcedure(*sym);
+  if (!std::holds_alternative<parser::OmpLocator>(object.u)) {
+    if (auto *sym{GetObjectSymbol(object, /*ultimate=*/true)}) {
+      return IsProcedure(*sym);
+    }
   }
   return false;
 }
 
 bool IsLocatorListItem(
     const parser::OmpObject &object, SemanticsContext *semaCtx) {
-  if (IsVariableListItem(object, semaCtx)) {
+  if (IsVariableListItem(object, semaCtx) ||
+      std::holds_alternative<parser::OmpLocator>(object.u)) {
     return true;
   }
+  // A statement function call may look like an array element access.
   if (auto *desg{parser::Unwrap<parser::Designator>(object)}) {
     evaluate::ExpressionAnalyzer ea(*semaCtx);
     auto restorer{ea.GetContextualMessages().DiscardMessages()};
@@ -447,6 +478,9 @@ std::optional<bool> IsContiguous(
             }
             return std::optional<bool>{};
           },
+          [&](const parser::OmpLocator &) { //
+            return std::optional<bool>{};
+          },
           [&](const parser::OmpObject::Invalid &) {
             return std::optional<bool>{};
           }},
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 2e1a1cbe01aef..f3865cfb877dc 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -3176,6 +3176,9 @@ void OmpAttributeVisitor::ResolveOmpObject(
           [&](const parser::Name &name) { // common block
             ResolveOmpCommonBlock(name, ompFlag);
           },
+          [&](const parser::OmpLocator &ref) {
+            // Do nothing here.
+          },
           [&](const parser::OmpObject::Invalid &invalid) {
             switch (invalid.v) {
               SWITCH_COVERS_ALL_CASES
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index c6800e15be9dc..a6f3fe12eb9b7 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -1738,6 +1738,7 @@ class OmpVisitor : public virtual DeclarationVisitor {
     return Pre(static_cast<const parser::OmpDirectiveSpecification &>(x));
   }
 
+  bool Pre(const parser::OmpReservedIdentifier &);
   void Post(const parser::OmpTypeName &);
   bool Pre(const parser::OmpStylizedDeclaration &);
   void Post(const parser::OmpStylizedDeclaration &);
@@ -1938,6 +1939,17 @@ void OmpVisitor::PushScopeWithSource(
   currScope().AddSourceRange(source);
 }
 
+bool OmpVisitor::Pre(const parser::OmpReservedIdentifier &x) {
+  // Create a unique symbol in the global scope.
+  if (auto *symbol{context().globalScope().FindSymbol(x.v.source)}) {
+    x.v.symbol = symbol;
+  } else {
+    MakePlaceholder(x.v, MiscDetails::Kind::None);
+  }
+  x.v.symbol->set(Symbol::Flag::OmpReserved);
+  return false;
+}
+
 void OmpVisitor::Post(const parser::OmpTypeName &x) {
   x.declTypeSpec = GetDeclTypeSpec();
 }
@@ -2205,7 +2217,7 @@ bool OmpVisitor::Pre(const parser::OmpDirectiveSpecification &x) {
               Walk(std::get<0>(names.t));
               Walk(std::get<1>(names.t));
             },
-            [&](const parser::OmpLocator &locator) {
+            [&](const parser::OmpObject &object) {
               // Manually resolve names in CRITICAL directives. This is because
               // these names do not denote Fortran objects, and the CRITICAL
               // directive causes them to be "auto-declared", i.e. inserted into
@@ -2215,7 +2227,7 @@ bool OmpVisitor::Pre(const parser::OmpDirectiveSpecification &x) {
               if (x.DirId() == llvm::omp::Directive::OMPD_critical) {
                 ResolveCriticalName(arg);
               } else {
-                Walk(locator);
+                Walk(object);
               }
             },
         },
diff --git a/flang/lib/Semantics/rewrite-parse-tree.cpp b/flang/lib/Semantics/rewrite-parse-tree.cpp
index 4e1c9bae9c153..bdccde8af3b94 100644
--- a/flang/lib/Semantics/rewrite-parse-tree.cpp
+++ b/flang/lib/Semantics/rewrite-parse-tree.cpp
@@ -82,6 +82,7 @@ class RewriteMutator {
   bool Pre(parser::EndSubroutineStmt &) { return false; }
   bool Pre(parser::EndTypeStmt &) { return false; }
 
+  bool Pre(parser::OmpObject &);
   bool Pre(parser::OmpBlockConstruct &);
   bool Pre(parser::OpenMPLoopConstruct &);
   void Post(parser::OmpBlockConstruct &);
@@ -371,6 +372,22 @@ bool RewriteMutator::Pre(parser::Block &block) {
 
 void RewriteMutator::Post(parser::Block &block) { this->Pre(block); }
 
+bool RewriteMutator::Pre(parser::OmpObject &object) {
+  // When parsing A(i) there is no way to tell whether it's a function call
+  // or an array element access. In OmpObject it will be preferentially
+  // parsed as FunctionReference, but once the name "A" is resolved, and it
+  // turns out to be an array, the function call in the OmpObject will need
+  // to be converted to an array element.
+  // This has to happen early, before the ExprChecker runs, or otherwise it
+  // will emit undesirable diagnostics.
+  if (auto *ref{parser::Unwrap<parser::FunctionReference>(object)}) {
+    if (CheckMisparsedArrayElement(context_, *ref)) {
+      object.u = ref->ConvertToArrayElementRef();
+    }
+  }
+  return true;
+}
+
 bool RewriteMutator::Pre(parser::OmpBlockConstruct &block) {
   if (context_.langOptions().OpenMPSimd) {
     auto &innerBlock = std::get<parser::Block>(block.t);
diff --git a/flang/test/Lower/OpenMP/Todo/locator-call-affinity.f90 b/flang/test/Lower/OpenMP/Todo/locator-call-affinity.f90
new file mode 100644
index 0000000000000..2f17e6c22c932
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/locator-call-affinity.f90
@@ -0,0 +1,13 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: Function call locators are not supported yet
+
+subroutine f
+  interface
+    function p
+      integer, pointer :: p
+    end
+  end interface
+  !$omp task affinity(p())
+  !$omp end task
+end
diff --git a/flang/test/Lower/OpenMP/Todo/locator-call-from.f90 b/flang/test/Lower/OpenMP/Todo/locator-call-from.f90
new file mode 100644
index 0000000000000..66e6a5eb1a147
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/locator-call-from.f90
@@ -0,0 +1,12 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: Function call locators are not supported yet
+
+subroutine f
+  interface
+    function p
+      integer, pointer :: p
+    end
+  end interface
+  !$omp target update from(p())
+end
diff --git a/flang/test/Lower/OpenMP/Todo/locator-call-map.f90 b/flang/test/Lower/OpenMP/Todo/locator-call-map.f90
new file mode 100644
index 0000000000000..a0c17c7584e80
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/locator-call-map.f90
@@ -0,0 +1,13 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: Function call locators are not supported yet
+
+subroutine f
+  interface
+    function p
+      integer, pointer :: p
+    end
+  end interface
+  !$omp target map(p())
+  !$omp end target
+end
diff --git a/flang/test/Lower/OpenMP/Todo/locator-call-to.f90 b/flang/test/Lower/OpenMP/Todo/locator-call-to.f90
new file mode 100644
index 0000000000000..e5747bdda82b1
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/locator-call-to.f90
@@ -0,0 +1,12 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: Function call locators are not supported yet
+
+subroutine f
+  interface
+    function p
+      integer, pointer :: p
+    end
+  end interface
+  !$omp target update to(p())
+end
diff --git a/flang/test/Lower/OpenMP/Todo/locator-reserved.f90 b/flang/test/Lower/OpenMP/Todo/locator-reserved.f90
new file mode 100644
index 0000000000000..e742ce843501b
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/locator-reserved.f90
@@ -0,0 +1,11 @@
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: Reserved locators are not supported yet
+
+subroutine f
+  ! This is a wrong use of OMP_ALL_MEMORY, but at the moment the clauses that
+  ! legally allow this locator aren't accepting it yet in flang.
+  !$omp target map(omp_all_memory)
+  !$omp end target
+end
+
diff --git a/flang/test/Parser/OpenMP/allocate-align-tree.f90 b/flang/test/Parser/OpenMP/allocate-align-tree.f90
index e440d23904693..35f7d00b88a29 100644
--- a/flang/test/Parser/OpenMP/allocate-align-tree.f90
+++ b/flang/test/Parser/OpenMP/allocate-align-tree.f90
@@ -25,7 +25,7 @@ end program allocate_align_tree
 !CHECK:      ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | OmpBeginDirective
 !CHECK-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'j'
+!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'j'
 !CHECK-NEXT: | | OmpClauseList -> OmpClause -> Align -> OmpAlignClause -> Scalar -> Integer -> Constant -> Expr = '16_4'
 !CHECK-NEXT: | | | LiteralConstant -> IntLiteralConstant = '16'
 !CHECK-NEXT: | | Flags = {}
@@ -33,7 +33,7 @@ end program allocate_align_tree
 !CHECK-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | | | OmpBeginDirective
 !CHECK-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'xarray'
+!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'xarray'
 !CHECK-NEXT: | | | | OmpClauseList -> OmpClause -> Align -> OmpAlignClause -> Scalar -> Integer -> Constant -> Expr = '32_4'
 !CHECK-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '32'
 !CHECK-NEXT: | | | | OmpClause -> Allocator -> Scalar -> Integer -> Expr = '2_8'
diff --git a/flang/test/Parser/OpenMP/allocate-tree-spec-part.f90 b/flang/test/Parser/OpenMP/allocate-tree-spec-part.f90
index 92ddbbdce05c5..6624273659200 100644
--- a/flang/test/Parser/OpenMP/allocate-tree-spec-part.f90
+++ b/flang/test/Parser/OpenMP/allocate-tree-spec-part.f90
@@ -20,7 +20,7 @@ end program allocate_tree
 !CHECK:      | | DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | | | OmpBeginDirective
 !CHECK-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'f'
+!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'f'
 !CHECK-NEXT: | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '1_8'
 !CHECK-NEXT: | | | | | Designator -> DataRef -> Name = 'omp_default_mem_alloc'
 !CHECK-NEXT: | | | | Flags = {}
@@ -34,7 +34,7 @@ end program allocate_tree
 !CHECK-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | | | OmpBeginDirective
 !CHECK-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'w'
+!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'w'
 !CHECK-NEXT: | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '3_8'
 !CHECK-NEXT: | | | | | Designator -> DataRef -> Name = 'omp_const_mem_alloc'
 !CHECK-NEXT: | | | | Flags = {}
@@ -42,7 +42,7 @@ end program allocate_tree
 !CHECK-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | | | | | OmpBeginDirective
 !CHECK-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'xarray'
+!CHECK-NEXT: | | | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'xarray'
 !CHECK-NEXT: | | | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '2_8'
 !CHECK-NEXT: | | | | | | | Designator -> DataRef -> Name = 'omp_large_cap_mem_alloc'
 !CHECK-NEXT: | | | | | | Flags = {}
@@ -50,7 +50,7 @@ end program allocate_tree
 !CHECK-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | | | | | | | OmpBeginDirective
 !CHECK-NEXT: | | | | | | | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | | | | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'zarray'
+!CHECK-NEXT: | | | | | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'zarray'
 !CHECK-NEXT: | | | | | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '1_8'
 !CHECK-NEXT: | | | | | | | | | Designator -> DataRef -> Name = 'omp_default_mem_alloc'
 !CHECK-NEXT: | | | | | | | | Flags = {}
diff --git a/flang/test/Parser/OpenMP/allocate-tree.f90 b/flang/test/Parser/OpenMP/allocate-tree.f90
index 17ffb76aeed96..d2d309a646f01 100644
--- a/flang/test/Parser/OpenMP/allocate-tree.f90
+++ b/flang/test/Parser/OpenMP/allocate-tree.f90
@@ -21,7 +21,7 @@ end program allocate_tree
 !CHECK:      DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | OmpBeginDirective
 !CHECK-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'w'
+!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'w'
 !CHECK-NEXT: | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '3_8'
 !CHECK-NEXT: | | | Designator -> DataRef -> Name = 'omp_const_mem_alloc'
 !CHECK-NEXT: | | Flags = {}
@@ -30,7 +30,7 @@ end program allocate_tree
 !CHECK:      ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | OmpBeginDirective
 !CHECK-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'xarray'
+!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'xarray'
 !CHECK-NEXT: | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '2_8'
 !CHECK-NEXT: | | | Designator -> DataRef -> Name = 'omp_large_cap_mem_alloc'
 !CHECK-NEXT: | | Flags = {}
@@ -38,7 +38,7 @@ end program allocate_tree
 !CHECK-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective
 !CHECK-NEXT: | | | OmpBeginDirective
 !CHECK-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'zarray'
+!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'zarray'
 !CHECK-NEXT: | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '1_8'
 !CHECK-NEXT: | | | | | Designator -> DataRef -> Name = 'omp_default_mem_alloc'
 !CHECK-NEXT: | | | | Flags = {}
diff --git a/flang/test/Parser/OpenMP/declare-variant.f90 b/flang/test/Parser/OpenMP/declare-variant.f90
index 07f65beaaf12a..cc967cf6642ee 100644
--- a/flang/test/Parser/OpenMP/declare-variant.f90
+++ b/flang/test/Parser/OpenMP/declare-variant.f90
@@ -38,7 +38,7 @@ subroutine sub (v1)
 
 !PARSE-TREE: OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'vsub'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'vsub'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector
 !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = Construct
 !PARSE-TREE: | | OmpTraitSelector
@@ -68,7 +68,7 @@ subroutine sub (v1)
 
 !PARSE-TREE: OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'vsub'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'vsub'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector
 !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = Construct
 !PARSE-TREE: | | OmpTraitSelector
@@ -96,7 +96,7 @@ subroutine sub (v1, v2)
 
 !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'vsub'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'vsub'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector
 !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = Construct
 !PARSE-TREE: | | OmpTraitSelector
@@ -136,7 +136,7 @@ subroutine f2 (x, y)
 
 !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'f1'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'f1'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector
 !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = Construct
 !PARSE-TREE: | | OmpTraitSelector
diff --git a/flang/test/Parser/OpenMP/depobj-construct.f90 b/flang/test/Parser/OpenMP/depobj-construct.f90
index 2d4831fe62bbb..9a49976cfd4b0 100644
--- a/flang/test/Parser/OpenMP/depobj-construct.f90
+++ b/flang/test/Parser/OpenMP/depobj-construct.f90
@@ -13,7 +13,7 @@ subroutine f00
 
 !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = depobj
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Depend -> OmpDependClause -> TaskDep
 !PARSE-TREE: | | Modifier -> OmpTaskDependenceType -> OmpDependenceKind = In
 !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'y'
@@ -30,7 +30,7 @@ subroutine f01
 
 !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = depobj
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Update -> OmpUpdateClause -> OmpTaskDependenceType -> OmpDependenceKind = Out
 
 subroutine f02
@@ -45,7 +45,7 @@ subroutine f02
 
 !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = depobj
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Destroy -> OmpDestroyClause -> OmpObject -> Designator -> DataRef -> Name = 'x'
 
 subroutine f03
@@ -60,7 +60,7 @@ subroutine f03
 
 !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = depobj
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Destroy ->
 
 subroutine f04
diff --git a/flang/test/Parser/OpenMP/groupprivate.f90 b/flang/test/Parser/OpenMP/groupprivate.f90
index 120af619d3b9b..82e16ba08d90f 100644
--- a/flang/test/Parser/OpenMP/groupprivate.f90
+++ b/flang/test/Parser/OpenMP/groupprivate.f90
@@ -19,12 +19,12 @@ module m
 
 !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpGroupprivateDirective -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = groupprivate
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
-!PARSE-TREE: | OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'y'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'y'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> DeviceType -> OmpDeviceTypeClause -> OmpDeviceType = Nohost
 !PARSE-TREE: | Flags = {}
 !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpGroupprivateDirective -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = groupprivate
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'z'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'z'
 !PARSE-TREE: | OmpClauseList ->
 !PARSE-TREE: | Flags = {}
diff --git a/flang/test/Parser/OpenMP/metadirective-dirspec.f90 b/flang/test/Parser/OpenMP/metadirective-dirspec.f90
index a24027161ef09..d63db6c93cf40 100644
--- a/flang/test/Parser/OpenMP/metadirective-dirspec.f90
+++ b/flang/test/Parser/OpenMP/metadirective-dirspec.f90
@@ -26,7 +26,7 @@ subroutine f00(x)
 !PARSE-TREE: | | | | | | | bool = 'true'
 !PARSE-TREE: | | OmpDirectiveSpecification
 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = allocate
-!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
 !PARSE-TREE: | | | OmpClauseList ->
 
 subroutine f01(x)
@@ -52,7 +52,7 @@ subroutine f01(x)
 !PARSE-TREE: | | | | | | | bool = 'true'
 !PARSE-TREE: | | OmpDirectiveSpecification
 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = critical
-!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
 !PARSE-TREE: | | | OmpClauseList ->
 
 subroutine f02
@@ -187,7 +187,7 @@ subroutine f04
 !PARSE-TREE: | | | | | | | bool = 'true'
 !PARSE-TREE: | | OmpDirectiveSpecification
 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = declare simd
-!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'f04'
+!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'f04'
 !PARSE-TREE: | | | OmpClauseList ->
 !PARSE-TREE: ImplicitPart ->
 
@@ -212,7 +212,7 @@ subroutine f05
 !PARSE-TREE: | | | | | | | bool = 'true'
 !PARSE-TREE: | | OmpDirectiveSpecification
 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = declare target
-!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'f05'
+!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'f05'
 !PARSE-TREE: | | | OmpClauseList ->
 !PARSE-TREE: ImplicitPart ->
 
@@ -239,8 +239,8 @@ subroutine f06(x, y)
 !PARSE-TREE: | | | | | | | bool = 'true'
 !PARSE-TREE: | | OmpDirectiveSpecification
 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = flush
-!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
-!PARSE-TREE: | | | | OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'y'
+!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | | | OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'y'
 !PARSE-TREE: | | | OmpClauseList ->
 
 subroutine f07
@@ -266,5 +266,5 @@ subroutine f07
 !PARSE-TREE: | | | | | | | bool = 'true'
 !PARSE-TREE: | | OmpDirectiveSpecification
 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = threadprivate
-!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 't'
+!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 't'
 !PARSE-TREE: | | | OmpClauseList ->
diff --git a/flang/test/Parser/OpenMP/metadirective-flush.f90 b/flang/test/Parser/OpenMP/metadirective-flush.f90
index e4e521ed07073..6b74bfdf50bdb 100644
--- a/flang/test/Parser/OpenMP/metadirective-flush.f90
+++ b/flang/test/Parser/OpenMP/metadirective-flush.f90
@@ -23,7 +23,7 @@ subroutine f00()
 !PARSE-TREE: | | | | | | | bool = 'true'
 !PARSE-TREE: | | OmpDirectiveSpecification
 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = flush
-!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
 !PARSE-TREE: | | | OmpClauseList -> OmpClause -> SeqCst
 !PARSE-TREE: | | | Flags = {DeprecatedSyntax}
 
@@ -49,6 +49,6 @@ subroutine f01()
 !PARSE-TREE: | | | | | | | bool = 'true'
 !PARSE-TREE: | | OmpDirectiveSpecification
 !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = flush
-!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x'
 !PARSE-TREE: | | | OmpClauseList -> OmpClause -> SeqCst
 !PARSE-TREE: | | | Flags = {}
diff --git a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
index dbe51d854da04..5a14d99795752 100644
--- a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
+++ b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
@@ -155,7 +155,7 @@ subroutine g05
 
 !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'g05'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'g05'
 !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector
 !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = User
 !PARSE-TREE: | | OmpTraitSelector
diff --git a/flang/test/Parser/OpenMP/threadprivate.f90 b/flang/test/Parser/OpenMP/threadprivate.f90
index e03bf6f7f94b9..5295acf6fa0a7 100644
--- a/flang/test/Parser/OpenMP/threadprivate.f90
+++ b/flang/test/Parser/OpenMP/threadprivate.f90
@@ -19,7 +19,7 @@ module m
 
 !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpThreadprivateDirective -> OmpDirectiveSpecification
 !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = threadprivate
-!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Name = 'blk'
-!PARSE-TREE: | OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'b'
+!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Name = 'blk'
+!PARSE-TREE: | OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'b'
 !PARSE-TREE: | OmpClauseList ->
 !PARSE-TREE: | Flags = {}
diff --git a/flang/test/Semantics/OpenMP/affinity-invalid.f90 b/flang/test/Semantics/OpenMP/affinity-invalid.f90
index 32e726bada937..34bf9bb3c0798 100644
--- a/flang/test/Semantics/OpenMP/affinity-invalid.f90
+++ b/flang/test/Semantics/OpenMP/affinity-invalid.f90
@@ -81,14 +81,6 @@ subroutine affinity_iterator_section_bad_stride(n)
   !$omp end task
 end subroutine
 
-subroutine affinity_substring_like_single_index()
-  character(len=7) :: s
-  !PORTABILITY: The use of substrings in OpenMP argument lists has been disallowed since OpenMP 5.2.
-  !ERROR: Substrings must be in the form parent-string(lb:ub)
-  !$omp task affinity(s(2))
-  !$omp end task
-end subroutine
-
 subroutine affinity_substring_like_step()
   character(len=7) :: s
   !PORTABILITY: The use of substrings in OpenMP argument lists has been disallowed since OpenMP 5.2.
diff --git a/flang/test/Semantics/OpenMP/depend-substring.f90 b/flang/test/Semantics/OpenMP/depend-substring.f90
index 23d6bb4c0b7b3..558172fa218d2 100644
--- a/flang/test/Semantics/OpenMP/depend-substring.f90
+++ b/flang/test/Semantics/OpenMP/depend-substring.f90
@@ -25,15 +25,6 @@ subroutine substring_2(c)
   !$omp end task
 end
 
-! Error
-subroutine substring_3(c)
-  character(:), pointer :: c
-  !PORTABILITY: The use of substrings in OpenMP argument lists has been disallowed since OpenMP 5.2.
-  !ERROR: Substrings must be in the form parent-string(lb:ub)
-  !$omp task depend(out:c(2))
-  !$omp end task
-end
-
 ! This is okay: interpreted as indexing into the array not as a substring
 subroutine substring_3b(c)
   character(:), pointer :: c(:)
diff --git a/flang/test/Semantics/OpenMP/reserved-locator.f90 b/flang/test/Semantics/OpenMP/reserved-locator.f90
new file mode 100644
index 0000000000000..3fc45ffa0f54c
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/reserved-locator.f90
@@ -0,0 +1,6 @@
+!RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=60
+
+subroutine f
+!ERROR: 'OMP_SOME_MEMORY' is not a valid locator
+  !$omp target update from(omp_some_memory)
+end
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h
index 8ba5171caab25..1faec3812412c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h
@@ -103,6 +103,8 @@ LLVM_ABI ArrayRef<unsigned> getOpenMPVersions();
 /// of a variable in given OpenMP version?
 LLVM_ABI bool isPrivatizingConstruct(Directive D, unsigned Version);
 
+LLVM_ABI ArrayRef<StringRef> getReservedLocatorNames();
+
 /// Create a nicer version of a function name for humans to look at.
 LLVM_ABI std::string prettifyFunctionName(StringRef FunctionName);
 
diff --git a/llvm/lib/Frontend/OpenMP/OMP.cpp b/llvm/lib/Frontend/OpenMP/OMP.cpp
index 871b6211fc2a5..86e144624211c 100644
--- a/llvm/lib/Frontend/OpenMP/OMP.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMP.cpp
@@ -228,6 +228,12 @@ bool isPrivatizingConstruct(Directive D, unsigned Version) {
   return llvm::is_contained(Privatizing, D);
 }
 
+ArrayRef<StringRef> getReservedLocatorNames() {
+  // All names must be lowercase.
+  static StringRef names[]{"omp_all_memory"};
+  return names;
+}
+
 std::string prettifyFunctionName(StringRef FunctionName) {
   // Internalized functions have the right name, but simply a suffix.
   if (FunctionName.ends_with(".internalized"))

>From 822212abb78ea6961bd098282b2fed93188f244f Mon Sep 17 00:00:00 2001
From: Nathan Corbyn <n_corbyn at apple.com>
Date: Wed, 24 Jun 2026 15:02:00 +0100
Subject: [PATCH 39/42] [AArch64] Optimise materialisation of large stack
 offset calculations (#201856)

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  | 21 +++++++++++++++++--
 ...ramelayout-scavengingslot-stack-hazard.mir |  4 ++--
 .../AArch64/framelayout-scavengingslot.mir    |  4 ++--
 llvm/test/CodeGen/AArch64/irg_sp_tagp.ll      |  4 ++--
 .../AArch64/large-stack-offset-calcs.mir      | 15 +++++++++++++
 llvm/test/CodeGen/AArch64/stack-guard-sve.ll  | 10 ++++-----
 .../CodeGen/AArch64/swiftself-scavenger.ll    |  6 +++---
 7 files changed, 47 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/large-stack-offset-calcs.mir

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index d5d1e17216e63..4dbe292a29843 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -7296,8 +7296,25 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
   if (MinOff <= NewOffset && NewOffset <= MaxOff)
     Offset = Remainder;
   else {
-    NewOffset = NewOffset < 0 ? MinOff : MaxOff;
-    Offset = Offset - (NewOffset * Scale);
+    // Try to minimise the number of instructions required to materialise the
+    // offset calculation. Specifically, for fixed offsets, if masking out the
+    // low 12 bits leaves a legal add immediate, we can realise the offset
+    // calculation with a single add instruction. Whenever this is possible,
+    // prefer this split.
+    int64_t HighPart = Offset & ~0xFFF;
+    int64_t LowPart = Offset & 0xFFF;
+    int64_t LowScaled = LowPart / Scale;
+    if (!IsMulVL && NewOffset >= 0 && LowPart % Scale == 0 &&
+        MinOff <= LowScaled && LowScaled <= MaxOff &&
+        AArch64_AM::isLegalArithImmed(HighPart)) {
+      NewOffset = LowScaled;
+      Offset = HighPart;
+    } else {
+      // Default to a greedy split: take the memop immediate to be maximum /
+      // minimum expressible offset and materialise the remainder.
+      NewOffset = NewOffset < 0 ? MinOff : MaxOff;
+      Offset = Offset - (NewOffset * Scale);
+    }
   }
 
   if (EmittableOffset)
diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir
index 52ac36f801854..5a75f540c60f6 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir
@@ -77,8 +77,8 @@ name: stack_hazard_streaming_compat_emergency_spill_slot
 # CHECK-LABEL: name: stack_hazard_streaming_compat_emergency_spill_slot
 # CHECK: bb.0:
 # CHECK:      STRXui killed $[[SCRATCH:x[0-9]+]], $x19, 0
-# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 1056, 0
-# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 4095
+# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 8, 12
+# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 131
 # CHECK-NEXT: $[[SCRATCH]] = LDRXui $x19, 0
 # CHECK: bb.1:
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir
index 390582969d026..17ae97ba58077 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir
@@ -6,8 +6,8 @@ name: LateScavengingSlotRealignment
 # CHECK-LABEL: name: LateScavengingSlotRealignment
 # CHECK: bb.0:
 # CHECK:      STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0
-# CHECK-NEXT: $[[SCRATCH]] = ADDXri $sp, 40, 0
-# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 4095
+# CHECK-NEXT: $[[SCRATCH]] = ADDXri $sp, 8, 12
+# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 4
 # CHECK-NEXT: $[[SCRATCH]] = LDRXui $sp, 0
 # CHECK: bb.1:
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll b/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll
index 4fa96c771a330..b6d32c59a1f19 100644
--- a/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll
+++ b/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll
@@ -37,8 +37,8 @@ define dso_local void @huge_allocas() {
 entry:
 ; CHECK-LABEL: huge_allocas:
 ; CHECK:      irg  x1, sp{{$}}
-; CHECK:      add  [[TMP:x[0-9]+]], x1, #3088
-; CHECK:      addg x0, [[TMP]], #1008, #1
+; CHECK:      add  [[TMP:x[0-9]+]], x1, #1, lsl #12
+; CHECK:      addg x0, [[TMP]], #0, #1
 ; CHECK:      bl use2
   %a = alloca i8, i64 4096, align 16
   %b = alloca i8, i64 4096, align 16
diff --git a/llvm/test/CodeGen/AArch64/large-stack-offset-calcs.mir b/llvm/test/CodeGen/AArch64/large-stack-offset-calcs.mir
new file mode 100644
index 0000000000000..bae954a7ad3d8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/large-stack-offset-calcs.mir
@@ -0,0 +1,15 @@
+# RUN: llc -mtriple=arm64-apple-ios -run-pass=prologepilog %s -o - | FileCheck %s
+---
+name: large_stack_offset_calc
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 32768, alignment: 8 }
+body: |
+  ; CHECK-LABEL: name: large_stack_offset_calc
+  ; CHECK:      $[[BASE:x[0-9]+]] = ADDXri $sp, 10, 12
+  ; CHECK-NEXT: STRXui $x0, killed $[[BASE]], 317
+  bb.0:
+    liveins: $x0
+    STRXui $x0, %stack.0, 5437 :: (store (s64))
+    RET_ReallyLR
+...
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
index e719e6d9d25b2..644ddc9ee4bc2 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
+++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll
@@ -151,10 +151,9 @@ entry:
 ; CHECK: addvl sp, sp, #-2
 
 ; Stack guard is placed below the SVE stack area (and above all fixed-width objects)
-; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #8, lsl #12
-; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC]], [[STACK_GUARD_SPILL_PART_LOC]], #16
+; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #16, lsl #12
 ; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard]
-; CHECK-DAG: str [[STACK_GUARD]], [[[STACK_GUARD_SPILL_PART_LOC]], #32760]
+; CHECK-DAG: str [[STACK_GUARD]], [[[STACK_GUARD_SPILL_PART_LOC]], #8]
 
 ; char_arr is below the stack guard
 ; CHECK-DAG: add [[CHAR_ARR_LOC:x[0-9]+]], sp, #16, lsl #12
@@ -206,9 +205,8 @@ entry:
 ; CHECK-DAG: str [[STACK_GUARD]], [[[STACK_GUARD_POS]]]
 
 ; char_arr is below the SVE stack area
-; CHECK-DAG: add [[CHAR_ARR:x[0-9]+]], sp, #15, lsl #12            // =61440
-; CHECK-DAG: add [[CHAR_ARR]], [[CHAR_ARR]], #9
-; CHECK-DAG: strb wzr, [[[CHAR_ARR]], #4095]
+; CHECK-DAG: add [[CHAR_ARR:x[0-9]+]], sp, #16, lsl #12            // =65536
+; CHECK-DAG: strb wzr, [[[CHAR_ARR]], #8]
 
 ; large1 is accessed via a virtual base register
 ; CHECK-DAG: add [[LARGE1:x[0-9]+]], sp, #8, lsl #12
diff --git a/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll b/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll
index 030593986be4a..a19cc12150924 100644
--- a/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll
+++ b/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll
@@ -3,8 +3,8 @@
 ; CSR spill for the values used by the swiftself parameter.
 ; CHECK-LABEL: func:
 ; CHECK: str [[REG:x[0-9]+]], [sp]
-; CHECK: add [[REG]], sp, #248
-; CHECK: str xzr, [{{\s*}}[[REG]], #32760]
+; CHECK: add [[REG]], sp, #8, lsl #12
+; CHECK: str xzr, [{{\s*}}[[REG]], #240]
 ; CHECK: ldr [[REG]], [sp]
 target triple = "arm64-apple-ios"
 
@@ -75,7 +75,7 @@ bb:
   store volatile i64 %v23, ptr @ptr64, align 8
   store volatile i64 %v24, ptr @ptr64, align 8
   store volatile i64 %v25, ptr @ptr64, align 8
-  
+
   ; use swiftself parameter late so it stays alive throughout the function.
   store volatile ptr %arg, ptr @ptr8
   ret void

>From 2ae9c99a738e78b8713e671b655ea3930439dd91 Mon Sep 17 00:00:00 2001
From: Lukas Sommer <lukas.sommer at amd.com>
Date: Wed, 24 Jun 2026 16:07:47 +0200
Subject: [PATCH 40/42] [AMDGPU] Promote uniform i16 ABS to i32 (#204526)

GlobalISel already expands uniform `i16` `G_ABS` to sign-extend to i32
and the native `s_abs` instruction.

This adds a similar expansion as DAGCombiner pattern, promoting uniform
`i16` `ABS` to `i32` that can use `s_abs`.

---------

Signed-off-by: Lukas Sommer <lukas.sommer at amd.com>
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  1 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 26 +++++++++++++++++++
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  1 +
 llvm/test/CodeGen/AMDGPU/absdiff.ll           |  4 +--
 4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index be6fd4d243252..795e487219d8f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1041,6 +1041,7 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
 bool AMDGPUTargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT,
                                                  EVT DestVT) const {
   switch (N->getOpcode()) {
+  case ISD::ABS:
   case ISD::ADD:
   case ISD::SUB:
   case ISD::SHL:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 03d7b936d4109..502cc438c836c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1052,6 +1052,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
                        ISD::FMINIMUMNUM,
                        ISD::FMAXIMUMNUM,
                        ISD::FMA,
+                       ISD::ABS,
                        ISD::SMIN,
                        ISD::SMAX,
                        ISD::UMIN,
@@ -8795,6 +8796,7 @@ SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
 
 static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
   switch (Op->getOpcode()) {
+  case ISD::ABS:
   case ISD::SRA:
   case ISD::SMIN:
   case ISD::SMAX:
@@ -8823,6 +8825,26 @@ static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
   }
 }
 
+SDValue
+SITargetLowering::promoteUniformUnaryOpToI32(SDValue Op,
+                                             DAGCombinerInfo &DCI) const {
+  EVT OpTy = Op.getValueType();
+  SelectionDAG &DAG = DCI.DAG;
+  EVT ExtTy = OpTy.changeElementType(*DAG.getContext(), MVT::i32);
+
+  if (isNarrowingProfitable(Op.getNode(), ExtTy, OpTy))
+    return SDValue();
+
+  SDLoc DL(Op);
+  SDValue Input = Op.getOperand(0);
+  const unsigned ExtOp = getExtOpcodeForPromotedOp(Op);
+  Input = DAG.getNode(ExtOp, DL, ExtTy, Input);
+
+  SDValue NewVal = DAG.getNode(Op.getOpcode(), DL, ExtTy, Input);
+
+  return DAG.getNode(ISD::TRUNCATE, DL, OpTy, NewVal);
+}
+
 SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
                                                 DAGCombinerInfo &DCI) const {
   const unsigned Opc = Op.getOpcode();
@@ -18543,6 +18565,10 @@ SDValue SITargetLowering::performSelectCombine(SDNode *N,
 SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
+  case ISD::ABS:
+    if (SDValue Res = promoteUniformUnaryOpToI32(SDValue(N, 0), DCI))
+      return Res;
+    break;
   case ISD::ADD:
   case ISD::SUB:
   case ISD::SHL:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 3d72723c9ca8f..c98426cdac0b1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -168,6 +168,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
   SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
+  SDValue promoteUniformUnaryOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
   SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AMDGPU/absdiff.ll b/llvm/test/CodeGen/AMDGPU/absdiff.ll
index 5e4947ea5e0b5..94385621f6b15 100644
--- a/llvm/test/CodeGen/AMDGPU/absdiff.ll
+++ b/llvm/test/CodeGen/AMDGPU/absdiff.ll
@@ -43,10 +43,8 @@ define amdgpu_ps i16 @absdiff_i16_false(i16 inreg %arg0, i16 inreg %arg1) {
 ; CHECK-LABEL: absdiff_i16_false:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_sub_i32 s0, s0, s1
-; CHECK-NEXT:    s_sext_i32_i16 s1, s0
-; CHECK-NEXT:    s_sub_i32 s0, 0, s0
 ; CHECK-NEXT:    s_sext_i32_i16 s0, s0
-; CHECK-NEXT:    s_max_i32 s0, s1, s0
+; CHECK-NEXT:    s_abs_i32 s0, s0
 ; CHECK-NEXT:    ; return to shader part epilog
   %diff = sub i16 %arg0, %arg1
   %res = call i16 @llvm.abs.i16(i16 %diff, i1 false) ; INT_MIN input returns INT_MIN

>From 3abbb6a92de7049503c1597550b319e1d4aa412e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 24 Jun 2026 15:11:22 +0100
Subject: [PATCH 41/42] [X86] isHorizontalBinOp - constify hadd/sub pattern
 matcher to make it easier to reuse. (#205538)

Avoid performing vector splits / bitcasts inside the isHorizontalBinOp call.

This is to make it easier to reuse the pattern matcher for other uses
(e.g. X86ISD::VPMADDWD/VPMADDUBSW detection).
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d3729b4102c55..8d87cf8a6665e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55266,8 +55266,8 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
 /// A horizontal-op B, for some already available A and B, and if so then LHS is
 /// set to A, RHS to B, and the routine returns 'true'.
 static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
-                              SelectionDAG &DAG, const X86Subtarget &Subtarget,
-                              bool IsCommutative,
+                              const SelectionDAG &DAG,
+                              const X86Subtarget &Subtarget, bool IsCommutative,
                               SmallVectorImpl<int> &PostShuffleMask,
                               bool ForceHorizOp) {
   // If either operand is undef, bail out. The binop should be simplified.
@@ -55312,8 +55312,11 @@ static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
         ShuffleMask.assign(ScaledMask.begin(), ScaledMask.end());
       }
       if (UseSubVector && SrcOps.size() == 1 &&
-          scaleShuffleElements(SrcMask, 2 * NumElts, ScaledMask)) {
-        std::tie(N0, N1) = DAG.SplitVector(SrcOps[0], SDLoc(Op));
+          scaleShuffleElements(SrcMask, 2 * NumElts, ScaledMask) &&
+          SrcOps[0].getOpcode() == ISD::CONCAT_VECTORS &&
+          SrcOps[0].getNumOperands() == 2) {
+        N0 = SrcOps[0].getOperand(0);
+        N1 = SrcOps[0].getOperand(1);
         ArrayRef<int> Mask = ArrayRef<int>(ScaledMask).slice(0, NumElts);
         ShuffleMask.assign(Mask.begin(), Mask.end());
       }
@@ -55449,8 +55452,8 @@ static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
                              DAG, Subtarget))
     return false;
 
-  LHS = DAG.getBitcast(VT, NewLHS);
-  RHS = DAG.getBitcast(VT, NewRHS);
+  LHS = NewLHS;
+  RHS = NewRHS;
   return true;
 }
 
@@ -55481,7 +55484,9 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
       auto HorizOpcode = IsAdd ? X86ISD::FHADD : X86ISD::FHSUB;
       if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd,
                             PostShuffleMask, MergableHorizOp(HorizOpcode))) {
-        SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
+        SDValue HorizBinOp =
+            DAG.getNode(HorizOpcode, SDLoc(N), VT, DAG.getBitcast(VT, LHS),
+                        DAG.getBitcast(VT, RHS));
         if (!PostShuffleMask.empty())
           HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
                                             DAG.getUNDEF(VT), PostShuffleMask);
@@ -55497,7 +55502,6 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
       break;
     if (VT == MVT::v8i16 || VT == MVT::v16i16 ||
         (!IsSat && (VT == MVT::v4i32 || VT == MVT::v8i32))) {
-
       SDValue LHS = N->getOperand(0);
       SDValue RHS = N->getOperand(1);
       auto HorizOpcode = IsSat ? (IsAdd ? X86ISD::HADDS : X86ISD::HSUBS)
@@ -55508,8 +55512,9 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
                                         ArrayRef<SDValue> Ops) {
           return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops);
         };
-        SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
-                                              {LHS, RHS}, HOpBuilder);
+        SDValue HorizBinOp = SplitOpsAndApply(
+            DAG, Subtarget, SDLoc(N), VT,
+            {DAG.getBitcast(VT, LHS), DAG.getBitcast(VT, RHS)}, HOpBuilder);
         if (!PostShuffleMask.empty())
           HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
                                             DAG.getUNDEF(VT), PostShuffleMask);

>From 2f0b11ceb4aeec9522fcba83fe939b837a43b43e Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 24 Jun 2026 13:03:13 +0000
Subject: [PATCH 42/42] [LoopInterchange] Remove some early exits in transform
 phase (NFCI)

---
 llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 1c07b25d86fb6..e9e77b7a48963 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -2115,10 +2115,8 @@ bool LoopInterchangeTransform::transform(
 
   LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
   auto &InductionPHIs = LIL.getInnerLoopInductions();
-  if (InductionPHIs.empty()) {
-    LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
-    return false;
-  }
+  assert(!InductionPHIs.empty() &&
+         "Expected at least one induction variable in the inner loop");
 
   SmallVector<Instruction *, 8> InnerIndexVarList;
   for (PHINode *CurInductionPHI : InductionPHIs) {
@@ -2479,17 +2477,15 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
   Instruction *OuterLoopHeaderBI = OuterLoopHeader->getTerminator();
   Instruction *InnerLoopHeaderBI = InnerLoopHeader->getTerminator();
 
-  if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
-      !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
-      !InnerLoopHeaderBI)
-    return false;
+  assert(OuterLoopPredecessor && InnerLoopLatchPredecessor &&
+         "Failed to find a unique predecessor");
+  assert(OuterLoopLatchBI && InnerLoopLatchBI &&
+         "Failed to find a conditional branch");
 
   Instruction *InnerLoopLatchPredecessorBI =
       InnerLoopLatchPredecessor->getTerminator();
   Instruction *OuterLoopPredecessorBI = OuterLoopPredecessor->getTerminator();
 
-  if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
-    return false;
   BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
   if (!InnerLoopHeaderSuccessor)
     return false;