[llvm-branch-commits] [clang] [flang] [libc] [libcxx] [lld] [lldb] [llvm] Propagate DebugLocs on phis in BreakCriticalEdges (PR #133492)

Thu May 8 04:52:02 PDT 2025

=?utf-8?q?Gaëtan?= Bossu <gaetan.bossu at arm.com>,Fangrui Song
 <i at maskray.me>,Stanislav Mekhanoshin <rampitec at users.noreply.github.com>,haonan
 <haonan.yang at intel.com>,Pierre van Houtryve <pierre.vanhoutryve at amd.com>,David
 Spickett <david.spickett at linaro.org>,Tom Eccles <tom.eccles at arm.com>,Luke
 Hutton <luke.hutton at arm.com>,Orlando Cazalet-Hyams <orlando.hyams at sony.com>,Orlando
 Cazalet-Hyams <orlando.hyams at sony.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/133492 at github.com>


https://github.com/OCHyams updated https://github.com/llvm/llvm-project/pull/133492

>From 55a88cdf53948e7460d9c6892f6c481480faa021 Mon Sep 17 00:00:00 2001
From: Mallikarjuna Gouda <mgouda at mips.com>
Date: Wed, 7 May 2025 21:42:06 +0530
Subject: [PATCH 001/115] [MIPS] Add FeatureMSA to i6400 and i6500 cores
 (#134985)

- Enable 'FeatureMSA' for MIPS i6400 and i6500 cpu.
- Enable -mmsa option if mcpu is set to either i6400 or i6500
- added clang driver test to validate msa feature
- added llvm codegen test to validate msa instructions for cpu i6500 and
i6400

MIPS i6400 and i6500 cores implements and enables MSA (MIPS SIMD
ARCHITECTURE) by default.
---
 clang/lib/Driver/ToolChains/Arch/Mips.cpp | 6 ++++++
 clang/test/Driver/mips-cpus.c             | 9 +++++++++
 llvm/lib/Target/Mips/Mips.td              | 4 ++--
 llvm/test/CodeGen/Mips/msa/arithmetic.ll  | 2 ++
 4 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Driver/mips-cpus.c

diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
index 9c817f238524c..960ee7fd179e1 100644
--- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp
@@ -255,6 +255,12 @@ void mips::getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
     D.Diag(diag::err_drv_unsupported_noabicalls_pic);
   }
 
+  if (CPUName == "i6500" || CPUName == "i6400") {
+    // MIPS cpu i6400 and i6500 support MSA (Mips SIMD Architecture)
+    // by default.
+    Features.push_back("+msa");
+  }
+
   if (!UseAbiCalls)
     Features.push_back("+noabicalls");
   else
diff --git a/clang/test/Driver/mips-cpus.c b/clang/test/Driver/mips-cpus.c
new file mode 100644
index 0000000000000..2e988e58f04fb
--- /dev/null
+++ b/clang/test/Driver/mips-cpus.c
@@ -0,0 +1,9 @@
+// Check target CPUs are correctly passed.
+
+// RUN: %clang --target=mips64 -### -c %s 2>&1 -mcpu=i6400 | FileCheck -check-prefix=MCPU-I6400 %s
+// MCPU-I6400: "-target-cpu" "i6400"
+// MCPU-I6400: "-target-feature" "+msa" "-target-feature" "-noabicalls"
+
+// RUN: %clang --target=mips64 -### -c %s 2>&1 -mcpu=i6500 | FileCheck -check-prefix=MCPU-I6500 %s
+// MCPU-I6500: "-target-cpu" "i6500"
+// MCPU-I6500: "-target-feature" "+msa" "-target-feature" "-noabicalls"
diff --git a/llvm/lib/Target/Mips/Mips.td b/llvm/lib/Target/Mips/Mips.td
index 99415bcdbc20b..b346ba95f5984 100644
--- a/llvm/lib/Target/Mips/Mips.td
+++ b/llvm/lib/Target/Mips/Mips.td
@@ -243,11 +243,11 @@ def ImplP5600 : SubtargetFeature<"p5600", "ProcImpl",
 // same CPU architecture.
 def ImplI6400
     : SubtargetFeature<"i6400", "ProcImpl", "MipsSubtarget::CPU::I6400",
-                       "MIPS I6400 Processor", [FeatureMips64r6]>;
+                       "MIPS I6400 Processor", [FeatureMips64r6, FeatureMSA]>;
 
 def ImplI6500
     : SubtargetFeature<"i6500", "ProcImpl", "MipsSubtarget::CPU::I6500",
-                       "MIPS I6500 Processor", [FeatureMips64r6]>;
+                       "MIPS I6500 Processor", [FeatureMips64r6, FeatureMSA]>;
 
 class Proc<string Name, list<SubtargetFeature> Features>
  : ProcessorModel<Name, MipsGenericModel, Features>;
diff --git a/llvm/test/CodeGen/Mips/msa/arithmetic.ll b/llvm/test/CodeGen/Mips/msa/arithmetic.ll
index a262ce183d74e..ad0493b694d48 100644
--- a/llvm/test/CodeGen/Mips/msa/arithmetic.ll
+++ b/llvm/test/CodeGen/Mips/msa/arithmetic.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=mips -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s --check-prefixes=ALL,MIPS
 ; RUN: llc -mtriple=mipsel -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s --check-prefixes=ALL,MIPSEL
+; RUN: llc -mtriple=mips64 -mcpu=i6500 < %s | FileCheck %s --check-prefixes=ALL
+; RUN: llc -mtriple=mips64 -mcpu=i6400 < %s | FileCheck %s --check-prefixes=ALL
 
 define void @add_v16i8(ptr %c, ptr %a, ptr %b) nounwind {
 ; ALL-LABEL: add_v16i8:

>From 8d9f5160b443f431f20f8f88183898503b62a173 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akaylor at nvidia.com>
Date: Wed, 7 May 2025 09:21:01 -0700
Subject: [PATCH 002/115] [CIR] Unblock simple C++ structure support (#138368)

This change adds additional checks to a few places where a simple struct
in C++ code was triggering `errorNYI` in places where no additional
handling was needed, and adds a very small amount of trivial
initialization. The code now checks for the conditions that do require
extra handling before issuing the diagnostic.

New tests are added for declaring and using a simple struct in C++ code.
---
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp          |  9 ++-
 clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp  | 32 ++++++++--
 clang/lib/CIR/CodeGen/CIRGenRecordLayout.h    | 26 +++++++-
 .../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 45 +++++++++++---
 clang/lib/CIR/CodeGen/CIRGenTypes.cpp         | 17 ++++--
 clang/lib/CIR/CodeGen/CIRGenTypes.h           |  1 +
 clang/test/CIR/CodeGen/nonzeroinit-struct.cpp | 19 ++++++
 clang/test/CIR/CodeGen/struct.cpp             | 37 ++++++++++++
 clang/test/CIR/CodeGen/union.cpp              | 59 +++++++++++++++++++
 9 files changed, 222 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/nonzeroinit-struct.cpp
 create mode 100644 clang/test/CIR/CodeGen/union.cpp

diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 94a6c03f7f1a4..64cbda2ebe0af 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -322,9 +322,12 @@ LValue CIRGenFunction::emitLValueForField(LValue base, const FieldDecl *field) {
   assert(!cir::MissingFeatures::opTBAA());
 
   Address addr = base.getAddress();
-  if (isa<CXXRecordDecl>(rec)) {
-    cgm.errorNYI(field->getSourceRange(), "emitLValueForField: C++ class");
-    return LValue();
+  if (auto *classDecl = dyn_cast<CXXRecordDecl>(rec)) {
+    if (cgm.getCodeGenOpts().StrictVTablePointers &&
+        classDecl->isDynamicClass()) {
+      cgm.errorNYI(field->getSourceRange(),
+                   "emitLValueForField: strict vtable for dynamic class");
+    }
   }
 
   unsigned recordCVR = base.getVRQualifiers();
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
index 2d8550fad454c..9085ee2dfe506 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp
@@ -14,6 +14,7 @@
 #include "CIRGenConstantEmitter.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
+#include "CIRGenRecordLayout.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/BuiltinAttributeInterfaces.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -365,12 +366,33 @@ mlir::Attribute ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &d) {
   // initialization of memory to all NULLs.
   if (!d.hasLocalStorage()) {
     QualType ty = cgm.getASTContext().getBaseElementType(d.getType());
-    if (ty->isRecordType())
-      if (d.getInit() && isa<CXXConstructExpr>(d.getInit())) {
-        cgm.errorNYI(d.getInit()->getBeginLoc(),
-                     "tryEmitPrivateForVarInit CXXConstructExpr");
-        return {};
+    if (ty->isRecordType()) {
+      if (const auto *e = dyn_cast_or_null<CXXConstructExpr>(d.getInit())) {
+        const CXXConstructorDecl *cd = e->getConstructor();
+        // FIXME: we should probably model this more closely to C++ than
+        // just emitting a global with zero init (mimic what we do for trivial
+        // assignments and whatnots). Since this is for globals shouldn't
+        // be a problem for the near future.
+        if (cd->isTrivial() && cd->isDefaultConstructor()) {
+          const auto *cxxrd =
+              cast<CXXRecordDecl>(ty->getAs<RecordType>()->getDecl());
+          if (cxxrd->getNumBases() != 0) {
+            // There may not be anything additional to do here, but this will
+            // force us to pause and test this path when it is supported.
+            cgm.errorNYI("tryEmitPrivateForVarInit: cxx record with bases");
+            return {};
+          }
+          if (!cgm.getTypes().isZeroInitializable(cxxrd)) {
+            // To handle this case, we really need to go through
+            // emitNullConstant, but we need an attribute, not a value
+            cgm.errorNYI(
+                "tryEmitPrivateForVarInit: non-zero-initializable cxx record");
+            return {};
+          }
+          return cir::ZeroAttr::get(cgm.convertType(d.getType()));
+        }
       }
+    }
   }
   inConstantContext = d.hasConstantInitialization();
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
index 11768b042e87e..2ece85b8aa0a3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayout.h
@@ -33,9 +33,23 @@ class CIRGenRecordLayout {
   /// field no. This info is populated by the record builder.
   llvm::DenseMap<const clang::FieldDecl *, unsigned> fieldIdxMap;
 
+  /// False if any direct or indirect subobject of this class, when considered
+  /// as a complete object, requires a non-zero bitpattern when
+  /// zero-initialized.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned zeroInitializable : 1;
+
+  /// False if any direct or indirect subobject of this class, when considered
+  /// as a base subobject, requires a non-zero bitpattern when zero-initialized.
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned zeroInitializableAsBase : 1;
+
 public:
-  CIRGenRecordLayout(cir::RecordType completeObjectType)
-      : completeObjectType(completeObjectType) {}
+  CIRGenRecordLayout(cir::RecordType completeObjectType, bool zeroInitializable,
+                     bool zeroInitializableAsBase)
+      : completeObjectType(completeObjectType),
+        zeroInitializable(zeroInitializable),
+        zeroInitializableAsBase(zeroInitializableAsBase) {}
 
   /// Return the "complete object" LLVM type associated with
   /// this record.
@@ -47,6 +61,14 @@ class CIRGenRecordLayout {
     assert(fieldIdxMap.count(fd) && "Invalid field for record!");
     return fieldIdxMap.lookup(fd);
   }
+
+  /// Check whether this struct can be C++ zero-initialized
+  /// with a zeroinitializer.
+  bool isZeroInitializable() const { return zeroInitializable; }
+
+  /// Check whether this struct can be C++ zero-initialized
+  /// with a zeroinitializer when considered as a base subobject.
+  bool isZeroInitializableAsBase() const { return zeroInitializableAsBase; }
 };
 
 } // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 5bcd408b4072a..53aa0aee36fc3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -77,6 +77,8 @@ struct CIRRecordLowering final {
     return astContext.toCharUnitsFromBits(bitOffset);
   }
 
+  void calculateZeroInit();
+
   CharUnits getSize(mlir::Type Ty) {
     return CharUnits::fromQuantity(dataLayout.layout.getTypeSize(Ty));
   }
@@ -177,18 +179,26 @@ void CIRRecordLowering::lower() {
     return;
   }
 
-  if (isa<CXXRecordDecl>(recordDecl)) {
-    cirGenTypes.getCGModule().errorNYI(recordDecl->getSourceRange(),
-                                       "lower: class");
-    return;
-  }
-
   assert(!cir::MissingFeatures::cxxSupport());
 
   CharUnits size = astRecordLayout.getSize();
 
   accumulateFields();
 
+  if (const auto *cxxRecordDecl = dyn_cast<CXXRecordDecl>(recordDecl)) {
+    if (cxxRecordDecl->getNumBases() > 0) {
+      CIRGenModule &cgm = cirGenTypes.getCGModule();
+      cgm.errorNYI(recordDecl->getSourceRange(),
+                   "CIRRecordLowering::lower: derived CXXRecordDecl");
+      return;
+    }
+    if (members.empty()) {
+      appendPaddingBytes(size);
+      assert(!cir::MissingFeatures::bitfields());
+      return;
+    }
+  }
+
   llvm::stable_sort(members);
   // TODO: implement clipTailPadding once bitfields are implemented
   assert(!cir::MissingFeatures::bitfields());
@@ -199,6 +209,7 @@ void CIRRecordLowering::lower() {
   insertPadding();
   members.pop_back();
 
+  calculateZeroInit();
   fillOutputFields();
 }
 
@@ -236,6 +247,19 @@ void CIRRecordLowering::accumulateFields() {
   }
 }
 
+void CIRRecordLowering::calculateZeroInit() {
+  for (const MemberInfo &member : members) {
+    if (member.kind == MemberInfo::InfoKind::Field) {
+      if (!member.fieldDecl || isZeroInitializable(member.fieldDecl))
+        continue;
+      zeroInitializable = zeroInitializableAsBase = false;
+      return;
+    }
+    // TODO(cir): handle base types
+    assert(!cir::MissingFeatures::cxxSupport());
+  }
+}
+
 void CIRRecordLowering::determinePacked() {
   if (packed)
     return;
@@ -295,7 +319,10 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *rd, cir::RecordType *ty) {
   // If we're in C++, compute the base subobject type.
   if (llvm::isa<CXXRecordDecl>(rd) && !rd->isUnion() &&
       !rd->hasAttr<FinalAttr>()) {
-    cgm.errorNYI(rd->getSourceRange(), "computeRecordLayout: CXXRecordDecl");
+    if (lowering.astRecordLayout.getNonVirtualSize() !=
+        lowering.astRecordLayout.getSize()) {
+      cgm.errorNYI(rd->getSourceRange(), "computeRecordLayout: CXXRecordDecl");
+    }
   }
 
   // Fill in the record *after* computing the base type.  Filling in the body
@@ -304,7 +331,9 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *rd, cir::RecordType *ty) {
   assert(!cir::MissingFeatures::astRecordDeclAttr());
   ty->complete(lowering.fieldTypes, lowering.packed, lowering.padded);
 
-  auto rl = std::make_unique<CIRGenRecordLayout>(ty ? *ty : cir::RecordType());
+  auto rl = std::make_unique<CIRGenRecordLayout>(
+      ty ? *ty : cir::RecordType(), (bool)lowering.zeroInitializable,
+      (bool)lowering.zeroInitializableAsBase);
 
   assert(!cir::MissingFeatures::recordZeroInit());
   assert(!cir::MissingFeatures::cxxSupport());
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index e85f2f4aa0978..89dc5eea7f028 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -237,8 +237,11 @@ mlir::Type CIRGenTypes::convertRecordDeclType(const clang::RecordDecl *rd) {
   assert(insertResult && "isSafeToCovert() should have caught this.");
 
   // Force conversion of non-virtual base classes recursively.
-  if (isa<CXXRecordDecl>(rd)) {
-    cgm.errorNYI(rd->getSourceRange(), "CXXRecordDecl");
+  if (const auto *cxxRecordDecl = dyn_cast<CXXRecordDecl>(rd)) {
+    if (cxxRecordDecl->getNumBases() > 0) {
+      cgm.errorNYI(rd->getSourceRange(),
+                   "convertRecordDeclType: derived CXXRecordDecl");
+    }
   }
 
   // Layout fields.
@@ -497,9 +500,9 @@ bool CIRGenTypes::isZeroInitializable(clang::QualType t) {
         return true;
   }
 
-  if (t->getAs<RecordType>()) {
-    cgm.errorNYI(SourceLocation(), "isZeroInitializable for RecordType", t);
-    return false;
+  if (const RecordType *rt = t->getAs<RecordType>()) {
+    const RecordDecl *rd = rt->getDecl();
+    return isZeroInitializable(rd);
   }
 
   if (t->getAs<MemberPointerType>()) {
@@ -511,6 +514,10 @@ bool CIRGenTypes::isZeroInitializable(clang::QualType t) {
   return true;
 }
 
+bool CIRGenTypes::isZeroInitializable(const RecordDecl *rd) {
+  return getCIRGenRecordLayout(rd).isZeroInitializable();
+}
+
 const CIRGenFunctionInfo &CIRGenTypes::arrangeCIRFunctionInfo(
     CanQualType returnType, llvm::ArrayRef<clang::CanQualType> argTypes) {
   assert(llvm::all_of(argTypes,
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h
index 38f4b389c8db9..cf94375d17e12 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h
@@ -120,6 +120,7 @@ class CIRGenTypes {
   /// Return whether a type can be zero-initialized (in the C++ sense) with an
   /// LLVM zeroinitializer.
   bool isZeroInitializable(clang::QualType ty);
+  bool isZeroInitializable(const RecordDecl *rd);
 
   const CIRGenFunctionInfo &arrangeFreeFunctionCall(const CallArgList &args,
                                                     const FunctionType *fnType);
diff --git a/clang/test/CIR/CodeGen/nonzeroinit-struct.cpp b/clang/test/CIR/CodeGen/nonzeroinit-struct.cpp
new file mode 100644
index 0000000000000..76832d1805030
--- /dev/null
+++ b/clang/test/CIR/CodeGen/nonzeroinit-struct.cpp
@@ -0,0 +1,19 @@
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s
+
+struct Other {
+    int x;
+};
+
+struct Trivial {
+    int x;
+    double y;
+    decltype(&Other::x) ptr;
+};
+
+// This case has a trivial default constructor, but can't be zero-initialized.
+Trivial t;
+
+// Since the case above isn't handled yet, we want a test that verifies that
+// we're failing for the right reason.
+
+// CHECK: error: ClangIR code gen Not Yet Implemented: tryEmitPrivateForVarInit: non-zero-initializable cxx record
diff --git a/clang/test/CIR/CodeGen/struct.cpp b/clang/test/CIR/CodeGen/struct.cpp
index 0d939ddd0b338..208d8f184475c 100644
--- a/clang/test/CIR/CodeGen/struct.cpp
+++ b/clang/test/CIR/CodeGen/struct.cpp
@@ -12,6 +12,17 @@ IncompleteS *p;
 // LLVM: @p = dso_local global ptr null
 // OGCG: @p = global ptr null, align 8
 
+struct CompleteS {
+  int a;
+  char b;
+};
+
+CompleteS cs;
+
+// CIR:       cir.global external @cs = #cir.zero : !rec_CompleteS
+// LLVM-DAG:  @cs = dso_local global %struct.CompleteS zeroinitializer
+// OGCG-DAG:  @cs = global %struct.CompleteS zeroinitializer, align 4
+
 void f(void) {
   IncompleteS *p;
 }
@@ -28,3 +39,29 @@ void f(void) {
 // OGCG-NEXT: entry:
 // OGCG-NEXT:   %[[P:.*]] = alloca ptr, align 8
 // OGCG-NEXT:   ret void
+
+char f2(CompleteS &s) {
+  return s.b;
+}
+
+// CIR: cir.func @_Z2f2R9CompleteS(%[[ARG_S:.*]]: !cir.ptr<!rec_CompleteS>{{.*}})
+// CIR:   %[[S_ADDR:.*]] = cir.alloca !cir.ptr<!rec_CompleteS>, !cir.ptr<!cir.ptr<!rec_CompleteS>>, ["s", init, const]
+// CIR:   cir.store %[[ARG_S]], %[[S_ADDR]]
+// CIR:   %[[S_REF:.*]] = cir.load %[[S_ADDR]]
+// CIR:   %[[S_ADDR2:.*]] = cir.get_member %[[S_REF]][1] {name = "b"}
+// CIR:   %[[S_B:.*]] = cir.load %[[S_ADDR2]]
+
+// LLVM: define i8 @_Z2f2R9CompleteS(ptr %[[ARG_S:.*]])
+// LLVM:   %[[S_ADDR:.*]] = alloca ptr
+// LLVM:   store ptr %[[ARG_S]], ptr %[[S_ADDR]]
+// LLVM:   %[[S_REF:.*]] = load ptr, ptr %[[S_ADDR]], align 8
+// LLVM:   %[[S_ADDR2:.*]] = getelementptr %struct.CompleteS, ptr %[[S_REF]], i32 0, i32 1
+// LLVM:   %[[S_B:.*]] = load i8, ptr %[[S_ADDR2]]
+
+// OGCG: define{{.*}} i8 @_Z2f2R9CompleteS(ptr{{.*}} %[[ARG_S:.*]])
+// OGCG: entry:
+// OGCG:   %[[S_ADDR:.*]] = alloca ptr
+// OGCG:   store ptr %[[ARG_S]], ptr %[[S_ADDR]]
+// OGCG:   %[[S_REF:.*]] = load ptr, ptr %[[S_ADDR]]
+// OGCG:   %[[S_ADDR2:.*]] = getelementptr inbounds nuw %struct.CompleteS, ptr %[[S_REF]], i32 0, i32 1
+// OGCG:   %[[S_B:.*]] = load i8, ptr %[[S_ADDR2]]
diff --git a/clang/test/CIR/CodeGen/union.cpp b/clang/test/CIR/CodeGen/union.cpp
new file mode 100644
index 0000000000000..24cd93f6b8edb
--- /dev/null
+++ b/clang/test/CIR/CodeGen/union.cpp
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// Should generate a union type with all members preserved.
+union U {
+  bool b;
+  short s;
+  int i;
+  float f;
+  double d;
+};
+// CIR: !rec_U = !cir.record<union "U" {!cir.bool, !s16i, !s32i, !cir.float, !cir.double}>
+// LLVM: %union.U = type { double }
+// OGCG: %union.U = type { double }
+
+void shouldGenerateUnionAccess(union U u) {
+  u.b = true;
+  u.b;
+  u.i = 1;
+  u.i;
+  u.f = 0.1F;
+  u.f;
+  u.d = 0.1;
+  u.d;
+}
+// CIR: cir.func {{.*}}shouldGenerateUnionAccess
+// CIR:   %[[#BASE:]] = cir.get_member %0[0] {name = "b"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.bool>
+// CIR:   cir.store %{{.+}}, %[[#BASE]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:   cir.get_member %0[0] {name = "b"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.bool>
+// CIR:   %[[#BASE:]] = cir.get_member %0[2] {name = "i"} : !cir.ptr<!rec_U> -> !cir.ptr<!s32i>
+// CIR:   cir.store %{{.+}}, %[[#BASE]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[#BASE:]] = cir.get_member %0[2] {name = "i"} : !cir.ptr<!rec_U> -> !cir.ptr<!s32i>
+// CIR:   %[[#BASE:]] = cir.get_member %0[3] {name = "f"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.float>
+// CIR:   cir.store %{{.+}}, %[[#BASE]] : !cir.float, !cir.ptr<!cir.float>
+// CIR:   %[[#BASE:]] = cir.get_member %0[3] {name = "f"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.float>
+// CIR:   %[[#BASE:]] = cir.get_member %0[4] {name = "d"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.double>
+// CIR:   cir.store %{{.+}}, %[[#BASE]] : !cir.double, !cir.ptr<!cir.double>
+// CIR:   %[[#BASE:]] = cir.get_member %0[4] {name = "d"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.double>
+
+// LLVM: define {{.*}}shouldGenerateUnionAccess
+// LLVM:   %[[BASE:.*]] = alloca %union.U
+// LLVM:   store %union.U %{{.*}}, ptr %[[BASE]]
+// LLVM:   store i8 1, ptr %[[BASE]]
+// LLVM:   store i32 1, ptr %[[BASE]]
+// LLVM:   store float 0x3FB99999A0000000, ptr %[[BASE]]
+// LLVM:   store double 1.000000e-01, ptr %[[BASE]]
+
+// OGCG: define {{.*}}shouldGenerateUnionAccess
+// OGCG:   %[[BASE:.*]] = alloca %union.U
+// OGCG:   %[[DIVE:.*]] = getelementptr inbounds nuw %union.U, ptr %[[BASE]], i32 0, i32 0
+// OGCG:   store i64 %{{.*}}, ptr %[[DIVE]]
+// OGCG:   store i8 1, ptr %[[BASE]]
+// OGCG:   store i32 1, ptr %[[BASE]]
+// OGCG:   store float 0x3FB99999A0000000, ptr %[[BASE]]
+// OGCG:   store double 1.000000e-01, ptr %[[BASE]]

>From 52e5889d0eeecec27beb4332c5d95d33bf3621d8 Mon Sep 17 00:00:00 2001
From: "Mikhail R. Gadelha" <mikhail at igalia.com>
Date: Wed, 7 May 2025 13:22:09 -0300
Subject: [PATCH 003/115] [libc] Enable exp10m1f on RISC-V (#138768)

Previously, the test failed due to isnan() and isinf() not being
defined.

This patch follows other tests in the same directory and calls isnan and
isinf from the FBits class.

---------

Co-authored-by: OverMighty <its.overmighty at gmail.com>
---
 libc/config/linux/riscv/entrypoints.txt | 2 +-
 libc/test/src/math/exp10m1f_test.cpp    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index d2fc128b35a3c..30d9d00dfefc9 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -451,7 +451,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.exp
     libc.src.math.exp10
     libc.src.math.exp10f
-    # libc.src.math.exp10m1f
+    libc.src.math.exp10m1f
     libc.src.math.exp2
     libc.src.math.exp2f
     libc.src.math.exp2m1f
diff --git a/libc/test/src/math/exp10m1f_test.cpp b/libc/test/src/math/exp10m1f_test.cpp
index cc960321175cb..aee273384f1a2 100644
--- a/libc/test/src/math/exp10m1f_test.cpp
+++ b/libc/test/src/math/exp10m1f_test.cpp
@@ -80,7 +80,7 @@ TEST_F(LlvmLibcExp10m1fTest, InFloatRange) {
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
     float x = FPBits(v).get_val();
-    if (isnan(x) || isinf(x))
+    if (FPBits(v).is_inf_or_nan())
       continue;
     LIBC_NAMESPACE::libc_errno = 0;
     float result = LIBC_NAMESPACE::exp10m1f(x);
@@ -89,7 +89,7 @@ TEST_F(LlvmLibcExp10m1fTest, InFloatRange) {
     // in the single-precision floating point range, then ignore comparing with
     // MPFR result as MPFR can still produce valid results because of its
     // wider precision.
-    if (isnan(result) || isinf(result) || LIBC_NAMESPACE::libc_errno != 0)
+    if (FPBits(result).is_inf_or_nan() || LIBC_NAMESPACE::libc_errno != 0)
       continue;
     ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp10m1, x,
                                    LIBC_NAMESPACE::exp10m1f(x), 0.5);

>From ce69a60bc21024706a90fb36ffc2b43e112fb002 Mon Sep 17 00:00:00 2001
From: Zhen Wang <37195552+wangzpgi at users.noreply.github.com>
Date: Wed, 7 May 2025 09:23:43 -0700
Subject: [PATCH 004/115] Skip contiguous check when ignore_tkr(c) is used
 (#138762)

The point of ignore_tkr(c) is to ignore both contiguous warnings and
errors for arguments of all attribute types.
---
 flang/lib/Semantics/check-call.cpp |  3 ++-
 flang/test/Semantics/cuf20.cuf     | 42 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/Semantics/cuf20.cuf

diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index dfaa0e028d698..11928860fea5f 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -1016,7 +1016,8 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy,
       }
     }
     if (dummyDataAttr == common::CUDADataAttr::Device &&
-        (dummyIsAssumedShape || dummyIsAssumedRank)) {
+        (dummyIsAssumedShape || dummyIsAssumedRank) &&
+        !dummy.ignoreTKR.test(common::IgnoreTKR::Contiguous)) {
       if (auto contig{evaluate::IsContiguous(actual, foldingContext,
               /*namedConstantSectionsAreContiguous=*/true,
               /*firstDimensionStride1=*/true)}) {
diff --git a/flang/test/Semantics/cuf20.cuf b/flang/test/Semantics/cuf20.cuf
new file mode 100644
index 0000000000000..222ff2a1b7c6d
--- /dev/null
+++ b/flang/test/Semantics/cuf20.cuf
@@ -0,0 +1,42 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+! Test case 1: Device arrays with ignore_tkr(c)
+subroutine test_device_arrays()
+  interface bar
+    subroutine bar1(a)
+!dir$ ignore_tkr(c) a
+      real :: a(..)
+!@cuf attributes(device) :: a
+    end subroutine
+  end interface
+
+  integer :: n = 10, k = 2
+  real, device :: a(10), b(10), c(10)
+  
+  call bar(a(1:n))     ! Should not warn about contiguity
+  call bar(b(1:n:k))   ! Should not warn about contiguity
+  call bar(c(1:n:2))   ! Should not warn about contiguity
+end subroutine
+
+! Test case 2: Managed arrays with ignore_tkr(c)
+subroutine test_managed_arrays()
+  interface bar
+    subroutine bar1(a)
+!dir$ ignore_tkr(c) a
+      real :: a(..)
+!@cuf attributes(device) :: a
+    end subroutine
+  end interface
+
+  integer :: n = 10, k = 2
+  real, managed :: a(10), b(10), c(10)
+  
+  call bar(a(1:n))     ! Should not warn about contiguity
+  call bar(b(1:n:k))   ! Should not warn about contiguity
+  call bar(c(1:n:2))   ! Should not warn about contiguity
+end subroutine
+
+program main
+  call test_device_arrays()
+  call test_managed_arrays()
+end program 
\ No newline at end of file

>From dbcfc43fa9bc5c6670d432341629e01cd84b1dab Mon Sep 17 00:00:00 2001
From: Prabhu Rajasekaran <prabhukr at google.com>
Date: Wed, 7 May 2025 09:30:49 -0700
Subject: [PATCH 005/115] [llvm][AsmPrinter] CodeView for UEFI (#138359)

---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 ++-
 llvm/test/DebugInfo/COFF/asm.ll            | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d7710212e8cc3..eb076960a5def 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -561,7 +561,8 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   if (MAI->doesSupportDebugInformation()) {
     bool EmitCodeView = M.getCodeViewFlag();
-    if (EmitCodeView && TM.getTargetTriple().isOSWindows())
+    if (EmitCodeView &&
+        (TM.getTargetTriple().isOSWindows() || TM.getTargetTriple().isUEFI()))
       Handlers.push_back(std::make_unique<CodeViewDebug>(this));
     if (!EmitCodeView || M.getDwarfVersion()) {
       if (hasDebugInfo()) {
diff --git a/llvm/test/DebugInfo/COFF/asm.ll b/llvm/test/DebugInfo/COFF/asm.ll
index d873df26bf718..cf440bd75fa34 100644
--- a/llvm/test/DebugInfo/COFF/asm.ll
+++ b/llvm/test/DebugInfo/COFF/asm.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -mcpu=core2 -mtriple=i686-pc-win32 -o - -O0 < %s | llvm-mc -triple=i686-pc-win32 -filetype=obj | llvm-readobj -S --sr --codeview - | FileCheck --check-prefix=OBJ32 %s
 ; RUN: llc -mcpu=core2 -mtriple=x86_64-pc-win32 -O0 < %s | FileCheck --check-prefix=X64 %s
 ; RUN: llc -mcpu=core2 -mtriple=x86_64-pc-win32 -o - -O0 < %s | llvm-mc -triple=x86_64-pc-win32 -filetype=obj | llvm-readobj -S --sr --codeview - | FileCheck --check-prefix=OBJ64 %s
+; RUN: llc -mcpu=core2 -mtriple=x86_64-uefi -O0 < %s | FileCheck --check-prefix=X64 %s
+; RUN: llc -mcpu=core2 -mtriple=x86_64-uefi -o - -O0 < %s | llvm-mc -triple=x86_64-pc-win32 -filetype=obj | llvm-readobj -S --sr --codeview - | FileCheck --check-prefix=OBJ64 %s
 
 ; This LL file was generated by running clang on the following code:
 ; D:\asm.c:

>From f9783c559f16991c19924898357edb9240a5f0aa Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Thu, 8 May 2025 00:37:46 +0800
Subject: [PATCH 006/115] [InstCombine] Fix `frexp(frexp(x)) -> frexp(x)` fold
 (#138837)

Fixes #138819

When frexp is applied twice, the second result should be zero.
---
 llvm/lib/Analysis/InstructionSimplify.cpp      |  9 ---------
 .../InstCombine/InstCombineCalls.cpp           | 15 +++++++++++++++
 .../test/CodeGen/AMDGPU/frexp-constant-fold.ll | 18 +++++++++---------
 .../{InstSimplify => InstCombine}/frexp.ll     | 11 +++++++----
 4 files changed, 31 insertions(+), 22 deletions(-)
 rename llvm/test/Transforms/{InstSimplify => InstCombine}/frexp.ll (96%)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5a2943de9066e..85e3be9cc45c3 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6377,15 +6377,6 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
     if (isSplatValue(Op0))
       return Op0;
     break;
-  case Intrinsic::frexp: {
-    // Frexp is idempotent with the added complication of the struct return.
-    if (match(Op0, m_ExtractValue<0>(m_Value(X)))) {
-      if (match(X, m_Intrinsic<Intrinsic::frexp>(m_Value())))
-        return X;
-    }
-
-    break;
-  }
   default:
     break;
   }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4716eeb60f0cd..3d35bf753c40e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3811,6 +3811,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     }
     break;
   }
+  case Intrinsic::frexp: {
+    Value *X;
+    // The first result is idempotent with the added complication of the struct
+    // return, and the second result is zero because the value is already
+    // normalized.
+    if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
+      if (match(X, m_Intrinsic<Intrinsic::frexp>(m_Value()))) {
+        X = Builder.CreateInsertValue(
+            X, Constant::getNullValue(II->getType()->getStructElementType(1)),
+            1);
+        return replaceInstUsesWith(*II, X);
+      }
+    }
+    break;
+  }
   default: {
     // Handle target specific intrinsics
     std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
diff --git a/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll b/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll
index daa304ef641a2..2e75b90c00968 100644
--- a/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll
+++ b/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll
@@ -10,9 +10,9 @@ define { float, i32 } @frexp_frexp(float %x) {
 ; CHECK-LABEL: frexp_frexp:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_frexp_mant_f32_e32 v2, v0
-; CHECK-NEXT:    v_frexp_exp_i32_f32_e32 v1, v0
-; CHECK-NEXT:    v_mov_b32_e32 v0, v2
+; CHECK-NEXT:    v_frexp_mant_f32_e32 v1, v0
+; CHECK-NEXT:    v_frexp_mant_f32_e32 v0, v1
+; CHECK-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %frexp0 = call { float, i32 } @llvm.frexp.f32.i32(float %x)
   %frexp0.0 = extractvalue { float, i32 } %frexp0, 0
@@ -24,12 +24,12 @@ define { <2 x float>, <2 x i32> } @frexp_frexp_vector(<2 x float> %x) {
 ; CHECK-LABEL: frexp_frexp_vector:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_frexp_mant_f32_e32 v4, v0
-; CHECK-NEXT:    v_frexp_mant_f32_e32 v5, v1
-; CHECK-NEXT:    v_frexp_exp_i32_f32_e32 v2, v0
-; CHECK-NEXT:    v_frexp_exp_i32_f32_e32 v3, v1
-; CHECK-NEXT:    v_mov_b32_e32 v0, v4
-; CHECK-NEXT:    v_mov_b32_e32 v1, v5
+; CHECK-NEXT:    v_frexp_mant_f32_e32 v3, v1
+; CHECK-NEXT:    v_frexp_mant_f32_e32 v2, v0
+; CHECK-NEXT:    v_frexp_mant_f32_e32 v0, v2
+; CHECK-NEXT:    v_frexp_mant_f32_e32 v1, v3
+; CHECK-NEXT:    v_frexp_exp_i32_f32_e32 v2, v2
+; CHECK-NEXT:    v_frexp_exp_i32_f32_e32 v3, v3
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %frexp0 = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %x)
   %frexp0.0 = extractvalue { <2 x float>, <2 x i32> } %frexp0, 0
diff --git a/llvm/test/Transforms/InstSimplify/frexp.ll b/llvm/test/Transforms/InstCombine/frexp.ll
similarity index 96%
rename from llvm/test/Transforms/InstSimplify/frexp.ll
rename to llvm/test/Transforms/InstCombine/frexp.ll
index 34cfce92bac43..6541f0d77a093 100644
--- a/llvm/test/Transforms/InstSimplify/frexp.ll
+++ b/llvm/test/Transforms/InstCombine/frexp.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -S -passes=instsimplify %s | FileCheck %s
+; RUN: opt -S -passes=instcombine %s | FileCheck %s
 
 declare { float, i32 } @llvm.frexp.f32.i32(float)
 declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float>)
@@ -12,7 +12,8 @@ define { float, i32 } @frexp_frexp(float %x) {
 ; CHECK-LABEL: define { float, i32 } @frexp_frexp(
 ; CHECK-SAME: float [[X:%.*]]) {
 ; CHECK-NEXT:    [[FREXP0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
-; CHECK-NEXT:    ret { float, i32 } [[FREXP0]]
+; CHECK-NEXT:    [[FREXP1:%.*]] = insertvalue { float, i32 } [[FREXP0]], i32 0, 1
+; CHECK-NEXT:    ret { float, i32 } [[FREXP1]]
 ;
   %frexp0 = call { float, i32 } @llvm.frexp.f32.i32(float %x)
   %frexp0.0 = extractvalue { float, i32 } %frexp0, 0
@@ -24,7 +25,8 @@ define { <2 x float>, <2 x i32> } @frexp_frexp_vector(<2 x float> %x) {
 ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_frexp_vector(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:    [[FREXP0:%.*]] = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> [[X]])
-; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } [[FREXP0]]
+; CHECK-NEXT:    [[FREXP1:%.*]] = insertvalue { <2 x float>, <2 x i32> } [[FREXP0]], <2 x i32> zeroinitializer, 1
+; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } [[FREXP1]]
 ;
   %frexp0 = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %x)
   %frexp0.0 = extractvalue { <2 x float>, <2 x i32> } %frexp0, 0
@@ -47,7 +49,8 @@ define { <vscale x 2 x float>, <vscale x 2 x i32> } @frexp_frexp_scalable_vector
 ; CHECK-LABEL: define { <vscale x 2 x float>, <vscale x 2 x i32> } @frexp_frexp_scalable_vector(
 ; CHECK-SAME: <vscale x 2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:    [[FREXP0:%.*]] = call { <vscale x 2 x float>, <vscale x 2 x i32> } @llvm.frexp.nxv2f32.nxv2i32(<vscale x 2 x float> [[X]])
-; CHECK-NEXT:    ret { <vscale x 2 x float>, <vscale x 2 x i32> } [[FREXP0]]
+; CHECK-NEXT:    [[FREXP1:%.*]] = insertvalue { <vscale x 2 x float>, <vscale x 2 x i32> } [[FREXP0]], <vscale x 2 x i32> zeroinitializer, 1
+; CHECK-NEXT:    ret { <vscale x 2 x float>, <vscale x 2 x i32> } [[FREXP1]]
 ;
   %frexp0 = call { <vscale x 2 x float>, <vscale x 2 x i32> } @llvm.frexp.nxv2f32.nxv2i32(<vscale x 2 x float> %x)
   %frexp0.0 = extractvalue { <vscale x 2 x float>, <vscale x 2 x i32> } %frexp0, 0

>From 3cb480b1bd8c3a368c33e180483178309c9ca753 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Wed, 7 May 2025 12:39:41 -0400
Subject: [PATCH 007/115] [C++20] Fix crash with invalid concept requirement
 (#138877)

We were previously recovering a bit too hard; consumeClose() would skip
to a recovery point, then we would call skipToEnd() to skip to another
recovery point. Needless to say, the follow-on diagnostics were not
great. But also, follow-on diagnostics were crashing due to unexpected
null constraint expressions.

Now we only recover once.

Fixes #138820
---
 clang/docs/ReleaseNotes.rst                        |  2 ++
 clang/lib/Parse/ParseExprCXX.cpp                   |  4 +++-
 clang/test/SemaCXX/concept-crash-on-diagnostic.cpp | 12 ++++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 55f774f5a672e..350244e3054cf 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -660,6 +660,8 @@ Bug Fixes to C++ Support
 - Fixed a crash when forming an invalid function type in a dependent context. (#GH138657) (#GH115725) (#GH68852)
 - No longer crashes when instantiating invalid variable template specialization
   whose type depends on itself. (#GH51347), (#GH55872)
+- Improved parser recovery of invalid requirement expressions. In turn, this
+  fixes crashes from follow-on processing of the invalid requirement. (#GH138820)
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 32b08a12a3bb6..546c228a30513 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -3706,8 +3706,10 @@ ExprResult Parser::ParseRequiresExpression() {
           SkipUntil(tok::semi, tok::r_brace, SkipUntilFlags::StopBeforeMatch);
           break;
         }
+        // If there's an error consuming the closing bracket, consumeClose()
+        // will handle skipping to the nearest recovery point for us.
         if (ExprBraces.consumeClose())
-          ExprBraces.skipToEnd();
+          break;
 
         concepts::Requirement *Req = nullptr;
         SourceLocation NoexceptLoc;
diff --git a/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp b/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp
index c38f8888075de..1efed72522fef 100644
--- a/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp
+++ b/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp
@@ -48,3 +48,15 @@ concept is_foo_concept = __is_same(foo::bar, T);
 // expected-error at -1 {{'bar' is a private member of 'GH131530::foo'}}
 
 }
+
+namespace GH138820 {
+int a;
+template<typename T>
+concept atomicish = requires() {
+  {    // expected-note {{to match this '{'}}
+    a
+   ... // expected-error {{expected '}'}}
+  };
+};
+atomicish<int> f(); // expected-error {{expected 'auto' or 'decltype(auto)' after concept name}}
+} // namespace GH138820

>From 2eb6545b3ecb567a85d9114dab69a1455c7a032c Mon Sep 17 00:00:00 2001
From: Morris Hafner <mmha at users.noreply.github.com>
Date: Wed, 7 May 2025 18:50:39 +0200
Subject: [PATCH 008/115] [CIR] Add cir-simplify pass (#138317)

This patch adds the cir-simplify pass for SelectOp and TernaryOp. It
also adds the SelectOp folder and adds the constant materializer for the
CIR dialect.
---
 clang/include/clang/CIR/CIRToCIRPasses.h      |   3 +-
 .../clang/CIR/Dialect/IR/CIRDialect.td        |   7 +
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |   2 +
 clang/include/clang/CIR/Dialect/Passes.h      |   1 +
 clang/include/clang/CIR/Dialect/Passes.td     |  19 ++
 clang/include/clang/CIR/MissingFeatures.h     |   1 -
 clang/lib/CIR/Dialect/IR/CIRDialect.cpp       |  30 +++
 .../Dialect/Transforms/CIRCanonicalize.cpp    |   3 +-
 .../CIR/Dialect/Transforms/CIRSimplify.cpp    | 202 ++++++++++++++++++
 .../lib/CIR/Dialect/Transforms/CMakeLists.txt |   1 +
 clang/lib/CIR/FrontendAction/CIRGenAction.cpp |  12 +-
 clang/lib/CIR/Lowering/CIRPasses.cpp          |   6 +-
 clang/test/CIR/Transforms/select.cir          |  60 ++++++
 clang/test/CIR/Transforms/ternary-fold.cir    |  76 +++++++
 clang/tools/cir-opt/cir-opt.cpp               |   3 +
 15 files changed, 416 insertions(+), 10 deletions(-)
 create mode 100644 clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp
 create mode 100644 clang/test/CIR/Transforms/select.cir
 create mode 100644 clang/test/CIR/Transforms/ternary-fold.cir

diff --git a/clang/include/clang/CIR/CIRToCIRPasses.h b/clang/include/clang/CIR/CIRToCIRPasses.h
index 361ebb9e9b840..4a23790ee8b76 100644
--- a/clang/include/clang/CIR/CIRToCIRPasses.h
+++ b/clang/include/clang/CIR/CIRToCIRPasses.h
@@ -32,7 +32,8 @@ namespace cir {
 mlir::LogicalResult runCIRToCIRPasses(mlir::ModuleOp theModule,
                                       mlir::MLIRContext &mlirCtx,
                                       clang::ASTContext &astCtx,
-                                      bool enableVerifier);
+                                      bool enableVerifier,
+                                      bool enableCIRSimplify);
 
 } // namespace cir
 
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
index 73759cfa9c3c9..52e32eedf774d 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td
@@ -27,6 +27,13 @@ def CIR_Dialect : Dialect {
   let useDefaultAttributePrinterParser = 0;
   let useDefaultTypePrinterParser = 0;
 
+  // Enable constant materialization for the CIR dialect. This generates a
+  // declaration for the cir::CIRDialect::materializeConstant function. This
+  // hook is necessary for canonicalization to properly handle attributes
+  // returned by fold methods, allowing them to be materialized as constant
+  // operations in the IR.
+  let hasConstantMaterializer = 1;
+
   let extraClassDeclaration = [{
     static llvm::StringRef getTripleAttrName() { return "cir.triple"; }
 
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 422c89c4f9391..8d01db03cb3fa 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -1464,6 +1464,8 @@ def SelectOp : CIR_Op<"select", [Pure,
       qualified(type($false_value))
     `)` `->` qualified(type($result)) attr-dict
   }];
+
+  let hasFolder = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/Dialect/Passes.h b/clang/include/clang/CIR/Dialect/Passes.h
index 133eb462dcf1f..dbecf81acf7bb 100644
--- a/clang/include/clang/CIR/Dialect/Passes.h
+++ b/clang/include/clang/CIR/Dialect/Passes.h
@@ -22,6 +22,7 @@ namespace mlir {
 
 std::unique_ptr<Pass> createCIRCanonicalizePass();
 std::unique_ptr<Pass> createCIRFlattenCFGPass();
+std::unique_ptr<Pass> createCIRSimplifyPass();
 std::unique_ptr<Pass> createHoistAllocasPass();
 
 void populateCIRPreLoweringPasses(mlir::OpPassManager &pm);
diff --git a/clang/include/clang/CIR/Dialect/Passes.td b/clang/include/clang/CIR/Dialect/Passes.td
index 74c255861c879..de775e69f0073 100644
--- a/clang/include/clang/CIR/Dialect/Passes.td
+++ b/clang/include/clang/CIR/Dialect/Passes.td
@@ -29,6 +29,25 @@ def CIRCanonicalize : Pass<"cir-canonicalize"> {
   let dependentDialects = ["cir::CIRDialect"];
 }
 
+def CIRSimplify : Pass<"cir-simplify"> {
+  let summary = "Performs CIR simplification and code optimization";
+  let description = [{
+    The pass performs semantics-preserving code simplifications and optimizations
+    on CIR while maintaining strict program correctness. 
+    
+    Unlike the `cir-canonicalize` pass, these transformations may reduce the IR's
+    structural similarity to the original source code as a trade-off for improved
+    code quality. This can affect debugging fidelity by altering intermediate
+    representations of folded expressions, hoisted operations, and other 
+    optimized constructs.
+    
+    Example transformations include ternary expression folding and code hoisting
+    while preserving program semantics.
+  }];
+  let constructor = "mlir::createCIRSimplifyPass()";
+  let dependentDialects = ["cir::CIRDialect"];
+}
+
 def HoistAllocas : Pass<"cir-hoist-allocas"> {
   let summary = "Hoist allocas to the entry of the function";
   let description = [{
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index eb75a073d1817..06636cd6c554c 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -206,7 +206,6 @@ struct MissingFeatures {
   static bool labelOp() { return false; }
   static bool ptrDiffOp() { return false; }
   static bool ptrStrideOp() { return false; }
-  static bool selectOp() { return false; }
   static bool switchOp() { return false; }
   static bool ternaryOp() { return false; }
   static bool tryOp() { return false; }
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 6b144149b41c9..b131edaf403ed 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -79,6 +79,14 @@ void cir::CIRDialect::initialize() {
   addInterfaces<CIROpAsmDialectInterface>();
 }
 
+Operation *cir::CIRDialect::materializeConstant(mlir::OpBuilder &builder,
+                                                mlir::Attribute value,
+                                                mlir::Type type,
+                                                mlir::Location loc) {
+  return builder.create<cir::ConstantOp>(loc, type,
+                                         mlir::cast<mlir::TypedAttr>(value));
+}
+
 //===----------------------------------------------------------------------===//
 // Helpers
 //===----------------------------------------------------------------------===//
@@ -1261,6 +1269,28 @@ void cir::TernaryOp::build(
     result.addTypes(TypeRange{yield.getOperandTypes().front()});
 }
 
+//===----------------------------------------------------------------------===//
+// SelectOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult cir::SelectOp::fold(FoldAdaptor adaptor) {
+  mlir::Attribute condition = adaptor.getCondition();
+  if (condition) {
+    bool conditionValue = mlir::cast<cir::BoolAttr>(condition).getValue();
+    return conditionValue ? getTrueValue() : getFalseValue();
+  }
+
+  // cir.select if %0 then x else x -> x
+  mlir::Attribute trueValue = adaptor.getTrueValue();
+  mlir::Attribute falseValue = adaptor.getFalseValue();
+  if (trueValue == falseValue)
+    return trueValue;
+  if (getTrueValue() == getFalseValue())
+    return getTrueValue();
+
+  return {};
+}
+
 //===----------------------------------------------------------------------===//
 // ShiftOp
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
index cdac69e66dba3..3b4c7bc613133 100644
--- a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
@@ -121,14 +121,13 @@ void CIRCanonicalizePass::runOnOperation() {
   getOperation()->walk([&](Operation *op) {
     assert(!cir::MissingFeatures::switchOp());
     assert(!cir::MissingFeatures::tryOp());
-    assert(!cir::MissingFeatures::selectOp());
     assert(!cir::MissingFeatures::complexCreateOp());
     assert(!cir::MissingFeatures::complexRealOp());
     assert(!cir::MissingFeatures::complexImagOp());
     assert(!cir::MissingFeatures::callOp());
     // CastOp and UnaryOp are here to perform a manual `fold` in
     // applyOpPatternsGreedily.
-    if (isa<BrOp, BrCondOp, ScopeOp, CastOp, UnaryOp>(op))
+    if (isa<BrOp, BrCondOp, CastOp, ScopeOp, SelectOp, UnaryOp>(op))
       ops.push_back(op);
   });
 
diff --git a/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp
new file mode 100644
index 0000000000000..b969569b0081c
--- /dev/null
+++ b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp
@@ -0,0 +1,202 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Block.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Region.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "clang/CIR/Dialect/Passes.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace mlir;
+using namespace cir;
+
+//===----------------------------------------------------------------------===//
+// Rewrite patterns
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Simplify suitable ternary operations into select operations.
+///
+/// For now we only simplify those ternary operations whose true and false
+/// branches directly yield a value or a constant. That is, both of the true and
+/// the false branch must either contain a cir.yield operation as the only
+/// operation in the branch, or contain a cir.const operation followed by a
+/// cir.yield operation that yields the constant value.
+///
+/// For example, we will simplify the following ternary operation:
+///
+///   %0 = ...
+///   %1 = cir.ternary (%condition, true {
+///     %2 = cir.const ...
+///     cir.yield %2
+///   } false {
+///     cir.yield %0
+///
+/// into the following sequence of operations:
+///
+///   %1 = cir.const ...
+///   %0 = cir.select if %condition then %1 else %2
+struct SimplifyTernary final : public OpRewritePattern<TernaryOp> {
+  using OpRewritePattern<TernaryOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(TernaryOp op,
+                                PatternRewriter &rewriter) const override {
+    if (op->getNumResults() != 1)
+      return mlir::failure();
+
+    if (!isSimpleTernaryBranch(op.getTrueRegion()) ||
+        !isSimpleTernaryBranch(op.getFalseRegion()))
+      return mlir::failure();
+
+    cir::YieldOp trueBranchYieldOp =
+        mlir::cast<cir::YieldOp>(op.getTrueRegion().front().getTerminator());
+    cir::YieldOp falseBranchYieldOp =
+        mlir::cast<cir::YieldOp>(op.getFalseRegion().front().getTerminator());
+    mlir::Value trueValue = trueBranchYieldOp.getArgs()[0];
+    mlir::Value falseValue = falseBranchYieldOp.getArgs()[0];
+
+    rewriter.inlineBlockBefore(&op.getTrueRegion().front(), op);
+    rewriter.inlineBlockBefore(&op.getFalseRegion().front(), op);
+    rewriter.eraseOp(trueBranchYieldOp);
+    rewriter.eraseOp(falseBranchYieldOp);
+    rewriter.replaceOpWithNewOp<cir::SelectOp>(op, op.getCond(), trueValue,
+                                               falseValue);
+
+    return mlir::success();
+  }
+
+private:
+  bool isSimpleTernaryBranch(mlir::Region &region) const {
+    if (!region.hasOneBlock())
+      return false;
+
+    mlir::Block &onlyBlock = region.front();
+    mlir::Block::OpListType &ops = onlyBlock.getOperations();
+
+    // The region/block could only contain at most 2 operations.
+    if (ops.size() > 2)
+      return false;
+
+    if (ops.size() == 1) {
+      // The region/block only contain a cir.yield operation.
+      return true;
+    }
+
+    // Check whether the region/block contains a cir.const followed by a
+    // cir.yield that yields the value.
+    auto yieldOp = mlir::cast<cir::YieldOp>(onlyBlock.getTerminator());
+    auto yieldValueDefOp = mlir::dyn_cast_if_present<cir::ConstantOp>(
+        yieldOp.getArgs()[0].getDefiningOp());
+    return yieldValueDefOp && yieldValueDefOp->getBlock() == &onlyBlock;
+  }
+};
+
+/// Simplify select operations with boolean constants into simpler forms.
+///
+/// This pattern simplifies select operations where both true and false values
+/// are boolean constants. Two specific cases are handled:
+///
+/// 1. When selecting between true and false based on a condition,
+///    the operation simplifies to just the condition itself:
+///
+///    %0 = cir.select if %condition then true else false
+///    ->
+///    (replaced with %condition directly)
+///
+/// 2. When selecting between false and true based on a condition,
+///    the operation simplifies to the logical negation of the condition:
+///
+///    %0 = cir.select if %condition then false else true
+///    ->
+///    %0 = cir.unary not %condition
+struct SimplifySelect : public OpRewritePattern<SelectOp> {
+  using OpRewritePattern<SelectOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(SelectOp op,
+                                PatternRewriter &rewriter) const final {
+    mlir::Operation *trueValueOp = op.getTrueValue().getDefiningOp();
+    mlir::Operation *falseValueOp = op.getFalseValue().getDefiningOp();
+    auto trueValueConstOp =
+        mlir::dyn_cast_if_present<cir::ConstantOp>(trueValueOp);
+    auto falseValueConstOp =
+        mlir::dyn_cast_if_present<cir::ConstantOp>(falseValueOp);
+    if (!trueValueConstOp || !falseValueConstOp)
+      return mlir::failure();
+
+    auto trueValue = mlir::dyn_cast<cir::BoolAttr>(trueValueConstOp.getValue());
+    auto falseValue =
+        mlir::dyn_cast<cir::BoolAttr>(falseValueConstOp.getValue());
+    if (!trueValue || !falseValue)
+      return mlir::failure();
+
+    // cir.select if %0 then #true else #false -> %0
+    if (trueValue.getValue() && !falseValue.getValue()) {
+      rewriter.replaceAllUsesWith(op, op.getCondition());
+      rewriter.eraseOp(op);
+      return mlir::success();
+    }
+
+    // cir.select if %0 then #false else #true -> cir.unary not %0
+    if (!trueValue.getValue() && falseValue.getValue()) {
+      rewriter.replaceOpWithNewOp<cir::UnaryOp>(op, cir::UnaryOpKind::Not,
+                                                op.getCondition());
+      return mlir::success();
+    }
+
+    return mlir::failure();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// CIRSimplifyPass
+//===----------------------------------------------------------------------===//
+
+struct CIRSimplifyPass : public CIRSimplifyBase<CIRSimplifyPass> {
+  using CIRSimplifyBase::CIRSimplifyBase;
+
+  void runOnOperation() override;
+};
+
+void populateMergeCleanupPatterns(RewritePatternSet &patterns) {
+  // clang-format off
+  patterns.add<
+    SimplifyTernary,
+    SimplifySelect
+  >(patterns.getContext());
+  // clang-format on
+}
+
+void CIRSimplifyPass::runOnOperation() {
+  // Collect rewrite patterns.
+  RewritePatternSet patterns(&getContext());
+  populateMergeCleanupPatterns(patterns);
+
+  // Collect operations to apply patterns.
+  llvm::SmallVector<Operation *, 16> ops;
+  getOperation()->walk([&](Operation *op) {
+    if (isa<TernaryOp, SelectOp>(op))
+      ops.push_back(op);
+  });
+
+  // Apply patterns.
+  if (applyOpPatternsGreedily(ops, std::move(patterns)).failed())
+    signalPassFailure();
+}
+
+} // namespace
+
+std::unique_ptr<Pass> mlir::createCIRSimplifyPass() {
+  return std::make_unique<CIRSimplifyPass>();
+}
diff --git a/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt
index 4678435b54c79..4dece5b57e450 100644
--- a/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt
+++ b/clang/lib/CIR/Dialect/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_clang_library(MLIRCIRTransforms
   CIRCanonicalize.cpp
+  CIRSimplify.cpp
   FlattenCFG.cpp
   HoistAllocas.cpp
 
diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
index a32e6a7584774..cc65c93f5f16b 100644
--- a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
+++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp
@@ -62,15 +62,16 @@ class CIRGenConsumer : public clang::ASTConsumer {
   IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;
   std::unique_ptr<CIRGenerator> Gen;
   const FrontendOptions &FEOptions;
+  CodeGenOptions &CGO;
 
 public:
   CIRGenConsumer(CIRGenAction::OutputType Action, CompilerInstance &CI,
-                 std::unique_ptr<raw_pwrite_stream> OS)
+                 CodeGenOptions &CGO, std::unique_ptr<raw_pwrite_stream> OS)
       : Action(Action), CI(CI), OutputStream(std::move(OS)),
         FS(&CI.getVirtualFileSystem()),
         Gen(std::make_unique<CIRGenerator>(CI.getDiagnostics(), std::move(FS),
                                            CI.getCodeGenOpts())),
-        FEOptions(CI.getFrontendOpts()) {}
+        FEOptions(CI.getFrontendOpts()), CGO(CGO) {}
 
   void Initialize(ASTContext &Ctx) override {
     assert(!Context && "initialized multiple times");
@@ -102,7 +103,8 @@ class CIRGenConsumer : public clang::ASTConsumer {
     if (!FEOptions.ClangIRDisablePasses) {
       // Setup and run CIR pipeline.
       if (runCIRToCIRPasses(MlirModule, MlirCtx, C,
-                            !FEOptions.ClangIRDisableCIRVerifier)
+                            !FEOptions.ClangIRDisableCIRVerifier,
+                            CGO.OptimizationLevel > 0)
               .failed()) {
         CI.getDiagnostics().Report(diag::err_cir_to_cir_transform_failed);
         return;
@@ -168,8 +170,8 @@ CIRGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   if (!Out)
     Out = getOutputStream(CI, InFile, Action);
 
-  auto Result =
-      std::make_unique<cir::CIRGenConsumer>(Action, CI, std::move(Out));
+  auto Result = std::make_unique<cir::CIRGenConsumer>(
+      Action, CI, CI.getCodeGenOpts(), std::move(Out));
 
   return Result;
 }
diff --git a/clang/lib/CIR/Lowering/CIRPasses.cpp b/clang/lib/CIR/Lowering/CIRPasses.cpp
index a37a0480a56ac..7a581939580a9 100644
--- a/clang/lib/CIR/Lowering/CIRPasses.cpp
+++ b/clang/lib/CIR/Lowering/CIRPasses.cpp
@@ -20,13 +20,17 @@ namespace cir {
 mlir::LogicalResult runCIRToCIRPasses(mlir::ModuleOp theModule,
                                       mlir::MLIRContext &mlirContext,
                                       clang::ASTContext &astContext,
-                                      bool enableVerifier) {
+                                      bool enableVerifier,
+                                      bool enableCIRSimplify) {
 
   llvm::TimeTraceScope scope("CIR To CIR Passes");
 
   mlir::PassManager pm(&mlirContext);
   pm.addPass(mlir::createCIRCanonicalizePass());
 
+  if (enableCIRSimplify)
+    pm.addPass(mlir::createCIRSimplifyPass());
+
   pm.enableVerifier(enableVerifier);
   (void)mlir::applyPassManagerCLOptions(pm);
   return pm.run(theModule);
diff --git a/clang/test/CIR/Transforms/select.cir b/clang/test/CIR/Transforms/select.cir
new file mode 100644
index 0000000000000..29a5d1ed1ddeb
--- /dev/null
+++ b/clang/test/CIR/Transforms/select.cir
@@ -0,0 +1,60 @@
+// RUN: cir-opt -cir-canonicalize -cir-simplify -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @fold_true(%arg0 : !s32i, %arg1 : !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.select if %0 then %arg0 else %arg1 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_true(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG0]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @fold_false(%arg0 : !s32i, %arg1 : !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.select if %0 then %arg0 else %arg1 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_false(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG1]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @fold_to_const(%arg0 : !cir.bool) -> !s32i {
+    %0 = cir.const #cir.int<42> : !s32i
+    %1 = cir.select if %arg0 then %0 else %0 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_to_const(%{{.+}}: !cir.bool) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:   cir.return %[[#A]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @simplify_1(%arg0 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.const #cir.bool<false> : !cir.bool
+    %2 = cir.select if %arg0 then %0 else %1 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %2 : !cir.bool
+  }
+
+  //      CHECK: cir.func @simplify_1(%[[ARG0:.+]]: !cir.bool) -> !cir.bool {
+  // CHECK-NEXT:   cir.return %[[ARG0]] : !cir.bool
+  // CHECK-NEXT: }
+
+  cir.func @simplify_2(%arg0 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.const #cir.bool<true> : !cir.bool
+    %2 = cir.select if %arg0 then %0 else %1 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %2 : !cir.bool
+  }
+
+  //      CHECK: cir.func @simplify_2(%[[ARG0:.+]]: !cir.bool) -> !cir.bool {
+  // CHECK-NEXT:   %[[#A:]] = cir.unary(not, %[[ARG0]]) : !cir.bool, !cir.bool
+  // CHECK-NEXT:   cir.return %[[#A]] : !cir.bool
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Transforms/ternary-fold.cir b/clang/test/CIR/Transforms/ternary-fold.cir
new file mode 100644
index 0000000000000..1192a0ce29424
--- /dev/null
+++ b/clang/test/CIR/Transforms/ternary-fold.cir
@@ -0,0 +1,76 @@
+// RUN: cir-opt -cir-canonicalize -cir-simplify -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @fold_ternary(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.ternary (%0, true {
+      cir.yield %arg0 : !s32i
+    }, false {
+      cir.yield %arg1 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_ternary(%{{.+}}: !s32i, %[[ARG:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @simplify_ternary(%arg0 : !cir.bool, %arg1 : !s32i) -> !s32i {
+    %0 = cir.ternary (%arg0, true {
+      %1 = cir.const #cir.int<42> : !s32i
+      cir.yield %1 : !s32i
+    }, false {
+      cir.yield %arg1 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %0 : !s32i
+  }
+
+  //      CHECK: cir.func @simplify_ternary(%[[ARG0:.+]]: !cir.bool, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:   %[[#B:]] = cir.select if %[[ARG0]] then %[[#A]] else %[[ARG1]] : (!cir.bool, !s32i, !s32i) -> !s32i
+  // CHECK-NEXT:   cir.return %[[#B]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @simplify_ternary_false_const(%arg0 : !cir.bool, %arg1 : !s32i) -> !s32i {
+    %0 = cir.ternary (%arg0, true {
+      cir.yield %arg1 : !s32i
+    }, false {
+      %1 = cir.const #cir.int<24> : !s32i
+      cir.yield %1 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %0 : !s32i
+  }
+
+  //      CHECK: cir.func @simplify_ternary_false_const(%[[ARG0:.+]]: !cir.bool, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<24> : !s32i
+  // CHECK-NEXT:   %[[#B:]] = cir.select if %[[ARG0]] then %[[ARG1]] else %[[#A]] : (!cir.bool, !s32i, !s32i) -> !s32i
+  // CHECK-NEXT:   cir.return %[[#B]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @non_simplifiable_ternary(%arg0 : !cir.bool) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+    %1 = cir.ternary (%arg0, true {
+      %2 = cir.const #cir.int<42> : !s32i
+      cir.yield %2 : !s32i
+    }, false {
+      %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.yield %3 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @non_simplifiable_ternary(%[[ARG0:.+]]: !cir.bool) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+  // CHECK-NEXT:   %[[#B:]] = cir.ternary(%[[ARG0]], true {
+  // CHECK-NEXT:     %[[#C:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:     cir.yield %[[#C]] : !s32i
+  // CHECK-NEXT:   }, false {
+  // CHECK-NEXT:     %[[#D:]] = cir.load %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT:     cir.yield %[[#D]] : !s32i
+  // CHECK-NEXT:   }) : (!cir.bool) -> !s32i
+  // CHECK-NEXT:   cir.return %[[#B]] : !s32i
+  // CHECK-NEXT: }
+}
diff --git a/clang/tools/cir-opt/cir-opt.cpp b/clang/tools/cir-opt/cir-opt.cpp
index e50fa70582966..0e20b97feced8 100644
--- a/clang/tools/cir-opt/cir-opt.cpp
+++ b/clang/tools/cir-opt/cir-opt.cpp
@@ -37,6 +37,9 @@ int main(int argc, char **argv) {
   ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
     return mlir::createCIRCanonicalizePass();
   });
+  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
+    return mlir::createCIRSimplifyPass();
+  });
 
   mlir::PassPipelineRegistration<CIRToLLVMPipelineOptions> pipeline(
       "cir-to-llvm", "",

>From 806a79abd0aac1f0e2ff7c1172ec402cc860a15b Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 7 May 2025 09:53:20 -0700
Subject: [PATCH 009/115] [llvm] Drop "const" from "const ArrayRef" (NFC)
 (#138818)

---
 llvm/include/llvm/MC/MCPseudoProbe.h          | 2 +-
 llvm/include/llvm/Transforms/IPO/Attributor.h | 3 +--
 llvm/lib/Transforms/IPO/Attributor.cpp        | 2 +-
 llvm/utils/TableGen/Common/CodeGenTarget.h    | 2 +-
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index fd1f055789544..dc14038a03acb 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -510,7 +510,7 @@ class MCPseudoProbeDecoder {
     return iterator_range(It->second);
   }
 
-  const ArrayRef<MCDecodedPseudoProbeInlineTree> getInlineTreeVec() const {
+  ArrayRef<MCDecodedPseudoProbeInlineTree> getInlineTreeVec() const {
     return InlineTreeVec;
   }
 
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index f82e169da00e3..0ac5e7e3cc368 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1343,8 +1343,7 @@ struct InformationCache {
 
   /// Return all functions that might be called indirectly, only valid for
   /// closed world modules (see isClosedWorldModule).
-  const ArrayRef<Function *>
-  getIndirectlyCallableFunctions(Attributor &A) const;
+  ArrayRef<Function *> getIndirectlyCallableFunctions(Attributor &A) const;
 
   /// Return the flat address space if the associated target has.
   std::optional<unsigned> getFlatAddressSpace() const;
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 40881fd3ceac8..e432f0cb7d897 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3285,7 +3285,7 @@ InformationCache::FunctionInfo::~FunctionInfo() {
     It.getSecond()->~InstructionVectorTy();
 }
 
-const ArrayRef<Function *>
+ArrayRef<Function *>
 InformationCache::getIndirectlyCallableFunctions(Attributor &A) const {
   assert(A.isClosedWorldModule() && "Cannot see all indirect callees!");
   return IndirectlyCallableFunctions;
diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.h b/llvm/utils/TableGen/Common/CodeGenTarget.h
index 682cc4e2bc9c6..da2f3e060591a 100644
--- a/llvm/utils/TableGen/Common/CodeGenTarget.h
+++ b/llvm/utils/TableGen/Common/CodeGenTarget.h
@@ -252,7 +252,7 @@ class ComplexPattern {
   const Record *getValueType() const { return Ty; }
   unsigned getNumOperands() const { return NumOperands; }
   const std::string &getSelectFunc() const { return SelectFunc; }
-  const ArrayRef<const Record *> getRootNodes() const { return RootNodes; }
+  ArrayRef<const Record *> getRootNodes() const { return RootNodes; }
   bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); }
   unsigned getComplexity() const { return Complexity; }
   bool wantsRoot() const { return WantsRoot; }

>From 0c01b316cc5662f02cea979a2d0af4b879b0f0dd Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 7 May 2025 09:53:47 -0700
Subject: [PATCH 010/115] [llvm] Drop "const" from "const StringRef" (NFC)
 (#138821)

---
 llvm/include/llvm/Analysis/DXILResource.h             | 2 +-
 llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Analysis/DXILResource.h b/llvm/include/llvm/Analysis/DXILResource.h
index 2631c3cb94c8a..3f62981d37acd 100644
--- a/llvm/include/llvm/Analysis/DXILResource.h
+++ b/llvm/include/llvm/Analysis/DXILResource.h
@@ -375,7 +375,7 @@ class ResourceInfo {
 
   const ResourceBinding &getBinding() const { return Binding; }
   TargetExtType *getHandleTy() const { return HandleTy; }
-  const StringRef getName() const { return Symbol ? Symbol->getName() : ""; }
+  StringRef getName() const { return Symbol ? Symbol->getName() : ""; }
 
   bool hasSymbol() const { return Symbol; }
   GlobalVariable *createSymbol(Module &M, StructType *Ty, StringRef Name = "");
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 28e5840fdde5b..036a859505d25 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -41,7 +41,7 @@ namespace ARM_AM {
 
   inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
 
-  inline const StringRef getShiftOpcStr(ShiftOpc Op) {
+  inline StringRef getShiftOpcStr(ShiftOpc Op) {
     switch (Op) {
     default: llvm_unreachable("Unknown shift opc!");
     case ARM_AM::asr: return "asr";

>From 411997ce21dca777100fc5105a12be1f13807e39 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 7 May 2025 09:53:59 -0700
Subject: [PATCH 011/115] [ADT] Drop "const" from "const APInt" (NFC) (#138825)

---
 llvm/include/llvm/ADT/APInt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 02d58d8c3d31c..ba33c49fb5191 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2234,12 +2234,12 @@ inline const APInt &umax(const APInt &A, const APInt &B) {
 }
 
 /// Determine the absolute difference of two APInts considered to be signed.
-inline const APInt abds(const APInt &A, const APInt &B) {
+inline APInt abds(const APInt &A, const APInt &B) {
   return A.sge(B) ? (A - B) : (B - A);
 }
 
 /// Determine the absolute difference of two APInts considered to be unsigned.
-inline const APInt abdu(const APInt &A, const APInt &B) {
+inline APInt abdu(const APInt &A, const APInt &B) {
   return A.uge(B) ? (A - B) : (B - A);
 }
 

>From c53e583007738f591d0c9993f06b65c4898a04f8 Mon Sep 17 00:00:00 2001
From: ShashwathiNavada <shashwathinavada at gmail.com>
Date: Wed, 7 May 2025 22:25:14 +0530
Subject: [PATCH 012/115] [Driver] Reject -mcmodel=tiny on X86 (#125643)

The mcmodel=tiny memory model is only valid on ARM targets. While trying
this on X86 compiler throws an internal error along with stack dump.
#125641
This patch resolves the issue.
Reduced test case:
```
#include <stdio.h>
int main( void )
{
printf( "Hello, World!\n" );
return 0;
}
```
```
0.	Program arguments: /opt/compiler-explorer/clang-trunk/bin/clang++ -gdwarf-4 -g -o /app/output.s -fno-verbose-asm -S --gcc-toolchain=/opt/compiler-explorer/gcc-snapshot -fcolor-diagnostics -fno-crash-diagnostics -mcmodel=tiny <source>
1.	<eof> parser at end of file
 #0 0x0000000003b10218 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x3b10218)
 #1 0x0000000003b0e35c llvm::sys::CleanupOnSignal(unsigned long) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x3b0e35c)
 #2 0x0000000003a5dbc3 llvm::CrashRecoveryContext::HandleExit(int) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x3a5dbc3)
 #3 0x0000000003b05cfe llvm::sys::Process::Exit(int, bool) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x3b05cfe)
 #4 0x0000000000d4e3eb LLVMErrorHandler(void*, char const*, bool) cc1_main.cpp:0:0
 #5 0x0000000003a67c93 llvm::report_fatal_error(llvm::Twine const&, bool) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x3a67c93)
 #6 0x0000000003a67df8 (/opt/compiler-explorer/clang-trunk/bin/clang+++0x3a67df8)
 #7 0x0000000002549148 llvm::X86TargetMachine::X86TargetMachine(llvm::Target const&, llvm::Triple const&, llvm::StringRef, llvm::StringRef, llvm::TargetOptions const&, std::optional<llvm::Reloc::Model>, std::optional<llvm::CodeModel::Model>, llvm::CodeGenOptLevel, bool) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x2549148)
 #8 0x00000000025491fc llvm::RegisterTargetMachine<llvm::X86TargetMachine>::Allocator(llvm::Target const&, llvm::Triple const&, llvm::StringRef, llvm::StringRef, llvm::TargetOptions const&, std::optional<llvm::Reloc::Model>, std::optional<llvm::CodeModel::Model>, llvm::CodeGenOptLevel, bool) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x25491fc)
 #9 0x0000000003db74cc clang::emitBackendOutput(clang::CompilerInstance&, clang::CodeGenOptions&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x3db74cc)
#10 0x0000000004460d95 clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x4460d95)
#11 0x00000000060005ec clang::ParseAST(clang::Sema&, bool, bool) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x60005ec)
#12 0x00000000044614b5 clang::CodeGenAction::ExecuteAction() (/opt/compiler-explorer/clang-trunk/bin/clang+++0x44614b5)
#13 0x0000000004737121 clang::FrontendAction::Execute() (/opt/compiler-explorer/clang-trunk/bin/clang+++0x4737121)
#14 0x00000000046b777b clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x46b777b)
#15 0x00000000048229e3 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x48229e3)
#16 0x0000000000d50621 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (/opt/compiler-explorer/clang-trunk/bin/clang+++0xd50621)
#17 0x0000000000d48e2d ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&) driver.cpp:0:0
#18 0x00000000044acc99 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const::'lambda'()>(long) Job.cpp:0:0
#19 0x0000000003a5dac3 llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x3a5dac3)
#20 0x00000000044aceb9 clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const (.part.0) Job.cpp:0:0
#21 0x00000000044710dd clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (/opt/compiler-explorer/clang-trunk/bin/clang+++0x44710dd)
#22 0x0000000004472071 clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&, bool) const (/opt/compiler-explorer/clang-trunk/bin/clang+++0x4472071)
#23 0x000000000447c3fc clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&) (/opt/compiler-explorer/clang-trunk/bin/clang+++0x447c3fc)
#24 0x0000000000d4d2b1 clang_main(int, char**, llvm::ToolContext const&) (/opt/compiler-explorer/clang-trunk/bin/clang+++0xd4d2b1)
#25 0x0000000000c12464 main (/opt/compiler-explorer/clang-trunk/bin/clang+++0xc12464)
#26 0x00007ae43b029d90 (/lib/x86_64-linux-gnu/libc.so.6+0x29d90)
#27 0x00007ae43b029e40 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x29e40)
#28 0x0000000000d488c5 _start (/opt/compiler-explorer/clang-trunk/bin/clang+++0xd488c5)
```

---------

Co-authored-by: Shashwathi N <nshashwa at pe31.hpc.amslabs.hpecorp.net>
---
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 3 +--
 clang/test/Driver/mcmodel.c                | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 8fbffa071634c..5c1bc090810a2 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2938,8 +2938,7 @@ void tools::addMCModel(const Driver &D, const llvm::opt::ArgList &Args,
       Ok = CM == "small" || CM == "medium" ||
            (CM == "large" && Triple.isRISCV64());
     } else if (Triple.getArch() == llvm::Triple::x86_64) {
-      Ok = llvm::is_contained({"small", "kernel", "medium", "large", "tiny"},
-                              CM);
+      Ok = llvm::is_contained({"small", "kernel", "medium", "large"}, CM);
     } else if (Triple.isNVPTX() || Triple.isAMDGPU() || Triple.isSPIRV()) {
       // NVPTX/AMDGPU/SPIRV does not care about the code model and will accept
       // whatever works for the host.
diff --git a/clang/test/Driver/mcmodel.c b/clang/test/Driver/mcmodel.c
index c6c8b5433d23b..51c2effb56ad4 100644
--- a/clang/test/Driver/mcmodel.c
+++ b/clang/test/Driver/mcmodel.c
@@ -1,5 +1,5 @@
 // RUN: not %clang -### -c --target=i686 -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=ERR-MEDIUM %s
-// RUN: %clang --target=x86_64 -### -c -mcmodel=tiny %s 2>&1 | FileCheck --check-prefix=TINY %s
+// RUN: not %clang --target=x86_64 -### -c -mcmodel=tiny %s 2>&1 | FileCheck --check-prefix=ERR-TINY %s
 // RUN: %clang --target=x86_64 -### -c -mcmodel=small %s 2>&1 | FileCheck --check-prefix=SMALL %s
 // RUN: %clang --target=x86_64 -### -S -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=KERNEL %s
 // RUN: %clang --target=x86_64 -### -c -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s

>From e33ca9adc8ccb0bc4fa590975898b788ef880dd7 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Wed, 7 May 2025 12:59:38 -0400
Subject: [PATCH 013/115] [libc++] Reword std::advance assertion message for
 consistency with ranges::advance (#138749)

As brought up in https://github.com/llvm/llvm-project/pull/133276.
---
 libcxx/include/__iterator/advance.h                  | 2 +-
 libcxx/test/libcxx/iterators/assert.advance.pass.cpp | 2 +-
 libcxx/test/libcxx/iterators/assert.next.pass.cpp    | 2 +-
 libcxx/test/libcxx/iterators/assert.prev.pass.cpp    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h
index f1a8d28f39aa0..c7d3c1f0e8f05 100644
--- a/libcxx/include/__iterator/advance.h
+++ b/libcxx/include/__iterator/advance.h
@@ -66,7 +66,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 void advance(_InputIter& __i
   typedef typename iterator_traits<_InputIter>::difference_type _Difference;
   _Difference __n = static_cast<_Difference>(std::__convert_to_integral(__orig_n));
   _LIBCPP_ASSERT_PEDANTIC(__has_bidirectional_iterator_category<_InputIter>::value || __n >= 0,
-                          "Attempt to advance(it, n) with negative n on a non-bidirectional iterator");
+                          "std::advance: Can only pass a negative `n` with a bidirectional_iterator.");
   std::__advance(__i, __n, typename iterator_traits<_InputIter>::iterator_category());
 }
 
diff --git a/libcxx/test/libcxx/iterators/assert.advance.pass.cpp b/libcxx/test/libcxx/iterators/assert.advance.pass.cpp
index e9d2f27008260..a7e8878b933b2 100644
--- a/libcxx/test/libcxx/iterators/assert.advance.pass.cpp
+++ b/libcxx/test/libcxx/iterators/assert.advance.pass.cpp
@@ -31,7 +31,7 @@ int main(int, char**) {
     forward_iterator<int *> it(a+1);
     std::advance(it, 1);  // should work fine
     std::advance(it, 0);  // should work fine
-    TEST_LIBCPP_ASSERT_FAILURE(std::advance(it, -1), "Attempt to advance(it, n) with negative n on a non-bidirectional iterator");
+    TEST_LIBCPP_ASSERT_FAILURE(std::advance(it, -1), "std::advance: Can only pass a negative `n` with a bidirectional_iterator.");
 
     return 0;
 }
diff --git a/libcxx/test/libcxx/iterators/assert.next.pass.cpp b/libcxx/test/libcxx/iterators/assert.next.pass.cpp
index 1e86723085542..2e0296b72d124 100644
--- a/libcxx/test/libcxx/iterators/assert.next.pass.cpp
+++ b/libcxx/test/libcxx/iterators/assert.next.pass.cpp
@@ -25,7 +25,7 @@ int main(int, char**) {
     forward_iterator<int *> it(a+1);
     (void)std::next(it, 1); // should work fine
     (void)std::next(it, 0); // should work fine
-    TEST_LIBCPP_ASSERT_FAILURE(std::next(it, -1), "Attempt to advance(it, n) with negative n on a non-bidirectional iterator");
+    TEST_LIBCPP_ASSERT_FAILURE(std::next(it, -1), "std::advance: Can only pass a negative `n` with a bidirectional_iterator.");
 
     return 0;
 }
diff --git a/libcxx/test/libcxx/iterators/assert.prev.pass.cpp b/libcxx/test/libcxx/iterators/assert.prev.pass.cpp
index 29b8d6ed5d1e2..deac1edf59e06 100644
--- a/libcxx/test/libcxx/iterators/assert.prev.pass.cpp
+++ b/libcxx/test/libcxx/iterators/assert.prev.pass.cpp
@@ -31,7 +31,7 @@ int main(int, char**) {
     forward_iterator<int *> it(a+1);
     (void)std::prev(it, -1); // should work fine
     (void)std::prev(it, 0);  // should work fine
-    TEST_LIBCPP_ASSERT_FAILURE(std::prev(it, 1), "Attempt to advance(it, n) with negative n on a non-bidirectional iterator");
+    TEST_LIBCPP_ASSERT_FAILURE(std::prev(it, 1), "std::advance: Can only pass a negative `n` with a bidirectional_iterator.");
 
     return 0;
 }

>From 91074a1b50fd497bef452eadef70b75a64dee3e4 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Wed, 7 May 2025 13:01:40 -0400
Subject: [PATCH 014/115] [libc++] Reword release note section about future
 releases (#138544)

For several releases, we had a section in the release notes that was
called "Upcoming Deprecations and Removals". That section was used to
advertize breaking changes in future releases as opposed to ones in the
current release.

However, the way this section was worded and organized made it unclear
what release these announcements related to. This patch rewords that
section of the release notes to make it less ambiguous and moves items
that aren't done yet (but relate to the ongoing release) to a different
section with a TODO.
---
 libcxx/docs/ReleaseNotes/21.rst | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst
index a7382c5222d08..c571dd6f08fe9 100644
--- a/libcxx/docs/ReleaseNotes/21.rst
+++ b/libcxx/docs/ReleaseNotes/21.rst
@@ -80,16 +80,18 @@ Deprecations and Removals
 - The ``_LIBCPP_VERBOSE_ABORT_NOT_NOEXCEPT`` has been removed, making ``std::__libcpp_verbose_abort``
   unconditionally ``noexcept``.
 
+- TODO: The non-conforming extension ``packaged_task::result_type`` has been removed in LLVM 21.
+
 Potentially breaking changes
 ----------------------------
 
 - The implementation of ``num_put::do_put`` has been replaced to improve the performance, which can lead to different
   output when printing pointers.
 
-Upcoming Deprecations and Removals
-----------------------------------
+Announcements About Future Releases
+-----------------------------------
 
-LLVM 21
+LLVM 22
 ~~~~~~~
 
 - The status of the C++03 implementation will be frozen after the LLVM 21 release. This means that starting in LLVM 22,
@@ -101,13 +103,6 @@ LLVM 21
   If you are using C++03 in your project, you should consider moving to a newer version of the Standard to get the most
   out of libc++.
 
-- Non-conforming extension ``packaged_task::result_type`` will be removed in LLVM 21.
-
-LLVM 22
-~~~~~~~
-
-- TODO
-
 
 ABI Affecting Changes
 ---------------------

>From 84b1b6231cb8ab3ebd92ded7dd54d344b10dffe2 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <rampitec at users.noreply.github.com>
Date: Wed, 7 May 2025 10:04:59 -0700
Subject: [PATCH 015/115] [AMDGPU] Fix endline in gfx950_invalid_encoding.txt.
 NFC. (#138813)

---
 llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt b/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
index b0f3a8af8f3fb..1a9434274d1a7 100644
--- a/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
+++ b/llvm/test/MC/AMDGPU/gfx950_invalid_encoding.txt
@@ -10,4 +10,4 @@
 0x00,0x00,0xbf,0xd3,0x02,0x09,0x0a,0x04
 
 # GFX950: warning: invalid instruction encoding
-0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04
\ No newline at end of file
+0x00,0x80,0xbf,0xd3,0x02,0x09,0x0a,0x04

>From 68ee36a144bb6f090b10586a9e1f049f1462132e Mon Sep 17 00:00:00 2001
From: Henrich Lauko <xlauko at mail.muni.cz>
Date: Wed, 7 May 2025 19:21:01 +0200
Subject: [PATCH 016/115] [CIR] Remove inferred context from pointer type
 getters (#138858)

This mirror incubator changes from https://github.com/llvm/clangir/pull/1600
---
 clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 2 +-
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index d96a1394b97f2..a63bf4f8858d0 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -111,7 +111,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   cir::BoolType getBoolTy() { return cir::BoolType::get(getContext()); }
 
   cir::PointerType getPointerTo(mlir::Type ty) {
-    return cir::PointerType::get(getContext(), ty);
+    return cir::PointerType::get(ty);
   }
 
   cir::PointerType getVoidPtrTy() {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 64cbda2ebe0af..711a65215b043 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -42,7 +42,7 @@ Address CIRGenFunction::emitAddrOfFieldStorage(Address base,
   mlir::Location loc = getLoc(field->getLocation());
 
   mlir::Type fieldType = convertType(field->getType());
-  auto fieldPtr = cir::PointerType::get(builder.getContext(), fieldType);
+  auto fieldPtr = cir::PointerType::get(fieldType);
   // For most cases fieldName is the same as field->getName() but for lambdas,
   // which do not currently carry the name, so it can be passed down from the
   // CaptureStmt.

>From 5fd90987e147c64a735f0cb2bcfbef4e4cce5e21 Mon Sep 17 00:00:00 2001
From: Henrich Lauko <xlauko at mail.muni.cz>
Date: Wed, 7 May 2025 19:23:53 +0200
Subject: [PATCH 017/115] [CIR] Refactor VoidPtr constraint to CIR_VoidPtrType
 (#138859)

This mirrors incubator changes from https://github.com/llvm/clangir/pull/1601
---
 .../CIR/Dialect/IR/CIRTypeConstraints.td      | 33 ++++++++++++++++
 .../include/clang/CIR/Dialect/IR/CIRTypes.td  | 38 +++++++++++--------
 2 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
index 10e5d15ff9fa8..00f67e2a03a25 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
@@ -141,4 +141,37 @@ def CIR_AnyIntOrFloatType : AnyTypeOf<[CIR_AnyFloatType, CIR_AnyIntType],
     let cppFunctionName = "isAnyIntegerOrFloatingPointType";
 }
 
+//===----------------------------------------------------------------------===//
+// Pointer Type predicates
+//===----------------------------------------------------------------------===//
+
+def CIR_AnyPtrType : CIR_TypeBase<"::cir::PointerType", "pointer type">;
+
+// Pointer to type constraint bases
+class CIR_IsPtrToPred<code type> : CPred<"$_self.isPtrTo<" # type # ">()">;
+
+class CIR_PtrTo<code type, string summary>
+    : CIR_ConfinedType<CIR_AnyPtrType, [CIR_IsPtrToPred<type>],
+        "pointer to " # summary>;
+
+// Pointer to pointer constraint bases
+class CIR_IsPtrToPtrToPred<code type>
+    : CPred<"$_self.isPtrToPtrTo<" # type # ">()">;
+
+class CIR_PtrToPtrTo<code type, string summary>
+    : CIR_ConfinedType<CIR_AnyPtrType, [CIR_IsPtrToPtrToPred<type>],
+        "pointer to pointer to " # summary>;
+
+// Void pointer type constraints
+def CIR_VoidPtrType
+    : CIR_PtrTo<"::cir::VoidType", "void type">,
+      BuildableType<"$_builder.getType<" # cppType # ">("
+        "cir::VoidType::get($_builder.getContext()))">;
+
+def CIR_PtrToVoidPtrType
+    : CIR_PtrToPtrTo<"::cir::VoidType", "void type">,
+      BuildableType<"$_builder.getType<" # cppType # ">("
+        "$_builder.getType<" # cppType # ">("
+        "cir::VoidType::get($_builder.getContext())))">;
+
 #endif // CLANG_CIR_DIALECT_IR_CIRTYPECONSTRAINTS_TD
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
index 959e2cd822e76..26f1122a4b261 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
@@ -197,8 +197,30 @@ def CIR_PointerType : CIR_Type<"Pointer", "ptr",
   let skipDefaultBuilders = 1;
 
   let extraClassDeclaration = [{
+    template <typename ...Types>
+    bool isPtrTo() const {
+      return mlir::isa< Types... >(getPointee());
+    }
+
     bool isVoidPtr() const {
-      return mlir::isa<cir::VoidType>(getPointee());
+      return isPtrTo<cir::VoidType>();
+    }
+
+    template <typename ...Types>
+    bool isPtrToPtrTo() const {
+      if (auto ptrType = mlir::dyn_cast<cir::PointerType>(getPointee()))
+        return ptrType.isPtrTo<Types...>();
+      return false;
+    }
+
+    bool isPtrTo(mlir::Type type) const {
+      return getPointee() == type;
+    }
+
+    bool isPtrToPtrTo(mlir::Type type) const {
+      if (auto ptrType = mlir::dyn_cast<cir::PointerType>(getPointee()))
+        return ptrType.isPtrTo(type);
+      return false;
     }
   }];
 }
@@ -368,20 +390,6 @@ def CIR_VoidType : CIR_Type<"Void", "void"> {
   }];
 }
 
-// Constraints
-
-// Pointer to void
-def VoidPtr : Type<
-    And<[
-      CPred<"::mlir::isa<::cir::PointerType>($_self)">,
-      CPred<"::mlir::isa<::cir::VoidType>("
-            "::mlir::cast<::cir::PointerType>($_self).getPointee())">,
-    ]>, "void*">,
-    BuildableType<
-      "cir::PointerType::get($_builder.getContext(),"
-      "cir::VoidType::get($_builder.getContext()))"> {
-}
-
 //===----------------------------------------------------------------------===//
 // RecordType
 //

>From 9808e1f9820eb16d240bee2e101b6538fe8b8269 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 7 May 2025 19:24:34 +0200
Subject: [PATCH 018/115] clang: Remove unnecessary pointer bitcast (#138857)

---
 clang/lib/CodeGen/CGPointerAuth.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGPointerAuth.cpp b/clang/lib/CodeGen/CGPointerAuth.cpp
index 0a183a8524c17..474848c8324f3 100644
--- a/clang/lib/CodeGen/CGPointerAuth.cpp
+++ b/clang/lib/CodeGen/CGPointerAuth.cpp
@@ -724,7 +724,6 @@ Address Address::getResignedAddress(const CGPointerAuthInfo &NewInfo,
     Val = CGF.emitPointerAuthResign(getBasePointer(), QualType(), CurInfo,
                                     NewInfo, isKnownNonNull());
 
-  Val = CGF.Builder.CreateBitCast(Val, getType());
   return Address(Val, getElementType(), getAlignment(), NewInfo,
                  /*Offset=*/nullptr, isKnownNonNull());
 }

>From e0a951fec4b01eba52e0aadc821b562201c5dfff Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Wed, 7 May 2025 10:07:33 -0700
Subject: [PATCH 019/115] [RISCV] Extend zvqdotq tests to cover use of
 accumulator operand

---
 .../RISCV/rvv/fixed-vectors-zvqdotq.ll        | 183 ++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
index e48bc9cdfea4e..edc9886abc3b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zvqdotq.ll
@@ -297,3 +297,186 @@ entry:
   %res = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a.ext)
   ret i32 %res
 }
+
+define i32 @vqdot_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) {
+; CHECK-LABEL: vqdot_vv_accum:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vsext.vf2 v16, v9
+; CHECK-NEXT:    vwmacc.vv v12, v10, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, zero
+; CHECK-NEXT:    vredsum.vs v8, v12, v8
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
+entry:
+  %a.sext = sext <16 x i8> %a to <16 x i32>
+  %b.sext = sext <16 x i8> %b to <16 x i32>
+  %mul = mul nuw nsw <16 x i32> %a.sext, %b.sext
+  %add = add <16 x i32> %mul, %x
+  %sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %add)
+  ret i32 %sum
+}
+
+define i32 @vqdotu_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) {
+; CHECK-LABEL: vqdotu_vv_accum:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:    vwmulu.vv v10, v8, v9
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vwaddu.wv v12, v12, v10
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, zero
+; CHECK-NEXT:    vredsum.vs v8, v12, v8
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
+entry:
+  %a.zext = zext <16 x i8> %a to <16 x i32>
+  %b.zext = zext <16 x i8> %b to <16 x i32>
+  %mul = mul nuw nsw <16 x i32> %a.zext, %b.zext
+  %add = add <16 x i32> %mul, %x
+  %sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %add)
+  ret i32 %sum
+}
+
+define i32 @vqdotsu_vv_accum(<16 x i8> %a, <16 x i8> %b, <16 x i32> %x) {
+; CHECK-LABEL: vqdotsu_vv_accum:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v10, v8
+; CHECK-NEXT:    vzext.vf2 v16, v9
+; CHECK-NEXT:    vwmaccsu.vv v12, v10, v16
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, zero
+; CHECK-NEXT:    vredsum.vs v8, v12, v8
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
+entry:
+  %a.sext = sext <16 x i8> %a to <16 x i32>
+  %b.zext = zext <16 x i8> %b to <16 x i32>
+  %mul = mul nuw nsw <16 x i32> %a.sext, %b.zext
+  %add = add <16 x i32> %mul, %x
+  %sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %add)
+  ret i32 %sum
+}
+
+define i32 @vqdot_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) {
+; NODOT-LABEL: vqdot_vv_scalar_add:
+; NODOT:       # %bb.0: # %entry
+; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; NODOT-NEXT:    vsext.vf2 v12, v8
+; NODOT-NEXT:    vsext.vf2 v14, v9
+; NODOT-NEXT:    vwmul.vv v8, v12, v14
+; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; NODOT-NEXT:    vmv.s.x v12, a0
+; NODOT-NEXT:    vredsum.vs v8, v8, v12
+; NODOT-NEXT:    vmv.x.s a0, v8
+; NODOT-NEXT:    ret
+;
+; DOT-LABEL: vqdot_vv_scalar_add:
+; DOT:       # %bb.0: # %entry
+; DOT-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; DOT-NEXT:    vmv.v.i v10, 0
+; DOT-NEXT:    vqdot.vv v10, v8, v9
+; DOT-NEXT:    vmv.s.x v8, a0
+; DOT-NEXT:    vredsum.vs v8, v10, v8
+; DOT-NEXT:    vmv.x.s a0, v8
+; DOT-NEXT:    ret
+entry:
+  %a.sext = sext <16 x i8> %a to <16 x i32>
+  %b.sext = sext <16 x i8> %b to <16 x i32>
+  %mul = mul nuw nsw <16 x i32> %a.sext, %b.sext
+  %sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+  %add = add i32 %sum, %x
+  ret i32 %add
+}
+
+define i32 @vqdotu_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) {
+; NODOT-LABEL: vqdotu_vv_scalar_add:
+; NODOT:       # %bb.0: # %entry
+; NODOT-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
+; NODOT-NEXT:    vwmulu.vv v10, v8, v9
+; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; NODOT-NEXT:    vmv.s.x v8, a0
+; NODOT-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; NODOT-NEXT:    vwredsumu.vs v8, v10, v8
+; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; NODOT-NEXT:    vmv.x.s a0, v8
+; NODOT-NEXT:    ret
+;
+; DOT-LABEL: vqdotu_vv_scalar_add:
+; DOT:       # %bb.0: # %entry
+; DOT-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; DOT-NEXT:    vmv.v.i v10, 0
+; DOT-NEXT:    vqdotu.vv v10, v8, v9
+; DOT-NEXT:    vmv.s.x v8, a0
+; DOT-NEXT:    vredsum.vs v8, v10, v8
+; DOT-NEXT:    vmv.x.s a0, v8
+; DOT-NEXT:    ret
+entry:
+  %a.zext = zext <16 x i8> %a to <16 x i32>
+  %b.zext = zext <16 x i8> %b to <16 x i32>
+  %mul = mul nuw nsw <16 x i32> %a.zext, %b.zext
+  %sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+  %add = add i32 %sum, %x
+  ret i32 %add
+}
+
+define i32 @vqdotsu_vv_scalar_add(<16 x i8> %a, <16 x i8> %b, i32 %x) {
+; NODOT-LABEL: vqdotsu_vv_scalar_add:
+; NODOT:       # %bb.0: # %entry
+; NODOT-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; NODOT-NEXT:    vsext.vf2 v12, v8
+; NODOT-NEXT:    vzext.vf2 v14, v9
+; NODOT-NEXT:    vwmulsu.vv v8, v12, v14
+; NODOT-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; NODOT-NEXT:    vmv.s.x v12, a0
+; NODOT-NEXT:    vredsum.vs v8, v8, v12
+; NODOT-NEXT:    vmv.x.s a0, v8
+; NODOT-NEXT:    ret
+;
+; DOT-LABEL: vqdotsu_vv_scalar_add:
+; DOT:       # %bb.0: # %entry
+; DOT-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; DOT-NEXT:    vmv.v.i v10, 0
+; DOT-NEXT:    vqdotsu.vv v10, v8, v9
+; DOT-NEXT:    vmv.s.x v8, a0
+; DOT-NEXT:    vredsum.vs v8, v10, v8
+; DOT-NEXT:    vmv.x.s a0, v8
+; DOT-NEXT:    ret
+entry:
+  %a.sext = sext <16 x i8> %a to <16 x i32>
+  %b.zext = zext <16 x i8> %b to <16 x i32>
+  %mul = mul nuw nsw <16 x i32> %a.sext, %b.zext
+  %sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %mul)
+  %add = add i32 %sum, %x
+  ret i32 %add
+}
+
+define i32 @vqdot_vv_split(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: vqdot_vv_split:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT:    vsext.vf2 v12, v8
+; CHECK-NEXT:    vsext.vf2 v14, v9
+; CHECK-NEXT:    vsext.vf2 v16, v10
+; CHECK-NEXT:    vsext.vf2 v18, v11
+; CHECK-NEXT:    vwmul.vv v8, v12, v14
+; CHECK-NEXT:    vwmacc.vv v8, v16, v18
+; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT:    vmv.s.x v12, zero
+; CHECK-NEXT:    vredsum.vs v8, v8, v12
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
+entry:
+  %a.sext = sext <16 x i8> %a to <16 x i32>
+  %b.sext = sext <16 x i8> %b to <16 x i32>
+  %mul = mul nuw nsw <16 x i32> %a.sext, %b.sext
+  %c.sext = sext <16 x i8> %c to <16 x i32>
+  %d.sext = sext <16 x i8> %d to <16 x i32>
+  %mul2 = mul nuw nsw <16 x i32> %c.sext, %d.sext
+  %add = add <16 x i32> %mul, %mul2
+  %sum = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %add)
+  ret i32 %sum
+}

>From 3212555c6c3f42361e371b18a69ebed372ee6da8 Mon Sep 17 00:00:00 2001
From: Steven Perron <stevenperron at google.com>
Date: Wed, 7 May 2025 13:27:28 -0400
Subject: [PATCH 020/115] [SPIRV] Reapply explicit layout PRs (#138867)

The asan failure was fixed by #138695, but another failure was
introduced in the meantime. The cause for the other failure has been
fixed. I will reapply the two PRs.

Reapply "[SPIRV] Add explicit layout (#135789)"

This reverts commit 0fb5720b4bf461d4d51ee85a8a6f4ea4f6fb4966.

Reapply "[SPIRV] Fix asan failure (#138695)"

This reverts commit df90ab96fb5a10df88fcfe6b0e8e63781ca24eca.
---
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 293 +++++++++++-------
 llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h   |  40 ++-
 llvm/lib/Target/SPIRV/SPIRVIRMapping.h        |  55 +++-
 llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp   |  94 ++++++
 llvm/lib/Target/SPIRV/SPIRVISelLowering.h     |   5 +
 .../SPIRV/hlsl-resources/StructuredBuffer.ll  |  11 +-
 .../CodeGen/SPIRV/spirv-explicit-layout.ll    | 149 +++++++++
 7 files changed, 523 insertions(+), 124 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/spirv-explicit-layout.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 88b1e44d15af0..ad42c73e24333 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -55,7 +55,6 @@ static unsigned typeToAddressSpace(const Type *Ty) {
   reportFatalInternalError("Unable to convert LLVM type to SPIRVType");
 }
 
-#ifndef NDEBUG
 static bool
 storageClassRequiresExplictLayout(SPIRV::StorageClass::StorageClass SC) {
   switch (SC) {
@@ -87,7 +86,6 @@ storageClassRequiresExplictLayout(SPIRV::StorageClass::StorageClass SC) {
   }
   llvm_unreachable("Unknown SPIRV::StorageClass enum");
 }
-#endif
 
 SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize)
     : PointerSize(PointerSize), Bound(0) {}
@@ -837,13 +835,31 @@ static std::string buildSpirvTypeName(const SPIRVType *Type,
   }
   case SPIRV::OpTypeStruct: {
     std::string TypeName = "{";
-    for (uint32_t I = 2; I < Type->getNumOperands(); ++I) {
+    for (uint32_t I = 1; I < Type->getNumOperands(); ++I) {
       SPIRVType *MemberType =
           GR.getSPIRVTypeForVReg(Type->getOperand(I).getReg());
-      TypeName = '_' + buildSpirvTypeName(MemberType, MIRBuilder, GR);
+      TypeName += '_' + buildSpirvTypeName(MemberType, MIRBuilder, GR);
     }
     return TypeName + "}";
   }
+  case SPIRV::OpTypeVector: {
+    MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+    Register ElementTypeReg = Type->getOperand(1).getReg();
+    auto *ElementType = MRI->getUniqueVRegDef(ElementTypeReg);
+    uint32_t VectorSize = GR.getScalarOrVectorComponentCount(Type);
+    return (buildSpirvTypeName(ElementType, MIRBuilder, GR) + Twine("[") +
+            Twine(VectorSize) + Twine("]"))
+        .str();
+  }
+  case SPIRV::OpTypeRuntimeArray: {
+    MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+    Register ElementTypeReg = Type->getOperand(1).getReg();
+    auto *ElementType = MRI->getUniqueVRegDef(ElementTypeReg);
+    uint32_t ArraySize = 0;
+    return (buildSpirvTypeName(ElementType, MIRBuilder, GR) + Twine("[") +
+            Twine(ArraySize) + Twine("]"))
+        .str();
+  }
   default:
     llvm_unreachable("Trying to the the name of an unknown type.");
   }
@@ -885,30 +901,41 @@ Register SPIRVGlobalRegistry::getOrCreateGlobalVariableWithBinding(
   return VarReg;
 }
 
+// TODO: Double check the calls to getOpTypeArray to make sure that `ElemType`
+// is explicitly laid out when required.
 SPIRVType *SPIRVGlobalRegistry::getOpTypeArray(uint32_t NumElems,
                                                SPIRVType *ElemType,
                                                MachineIRBuilder &MIRBuilder,
+                                               bool ExplicitLayoutRequired,
                                                bool EmitIR) {
   assert((ElemType->getOpcode() != SPIRV::OpTypeVoid) &&
          "Invalid array element type");
   SPIRVType *SpvTypeInt32 = getOrCreateSPIRVIntegerType(32, MIRBuilder);
-
+  SPIRVType *ArrayType = nullptr;
   if (NumElems != 0) {
     Register NumElementsVReg =
         buildConstantInt(NumElems, MIRBuilder, SpvTypeInt32, EmitIR);
-    return createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
+    ArrayType = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
       return MIRBuilder.buildInstr(SPIRV::OpTypeArray)
           .addDef(createTypeVReg(MIRBuilder))
           .addUse(getSPIRVTypeID(ElemType))
           .addUse(NumElementsVReg);
     });
+  } else {
+    ArrayType = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
+      return MIRBuilder.buildInstr(SPIRV::OpTypeRuntimeArray)
+          .addDef(createTypeVReg(MIRBuilder))
+          .addUse(getSPIRVTypeID(ElemType));
+    });
   }
 
-  return createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
-    return MIRBuilder.buildInstr(SPIRV::OpTypeRuntimeArray)
-        .addDef(createTypeVReg(MIRBuilder))
-        .addUse(getSPIRVTypeID(ElemType));
-  });
+  if (ExplicitLayoutRequired && !isResourceType(ElemType)) {
+    Type *ET = const_cast<Type *>(getTypeForSPIRVType(ElemType));
+    addArrayStrideDecorations(ArrayType->defs().begin()->getReg(), ET,
+                              MIRBuilder);
+  }
+
+  return ArrayType;
 }
 
 SPIRVType *SPIRVGlobalRegistry::getOpTypeOpaque(const StructType *Ty,
@@ -926,7 +953,8 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeOpaque(const StructType *Ty,
 
 SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(
     const StructType *Ty, MachineIRBuilder &MIRBuilder,
-    SPIRV::AccessQualifier::AccessQualifier AccQual, bool EmitIR) {
+    SPIRV::AccessQualifier::AccessQualifier AccQual,
+    bool ExplicitLayoutRequired, bool EmitIR) {
   SmallVector<Register, 4> FieldTypes;
   constexpr unsigned MaxWordCount = UINT16_MAX;
   const size_t NumElements = Ty->getNumElements();
@@ -940,8 +968,8 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(
   }
 
   for (const auto &Elem : Ty->elements()) {
-    SPIRVType *ElemTy =
-        findSPIRVType(toTypedPointer(Elem), MIRBuilder, AccQual, EmitIR);
+    SPIRVType *ElemTy = findSPIRVType(toTypedPointer(Elem), MIRBuilder, AccQual,
+                                      ExplicitLayoutRequired, EmitIR);
     assert(ElemTy && ElemTy->getOpcode() != SPIRV::OpTypeVoid &&
            "Invalid struct element type");
     FieldTypes.push_back(getSPIRVTypeID(ElemTy));
@@ -952,18 +980,27 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeStruct(
   if (Ty->isPacked())
     buildOpDecorate(ResVReg, MIRBuilder, SPIRV::Decoration::CPacked, {});
 
-  return createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
-    auto MIBStruct = MIRBuilder.buildInstr(SPIRV::OpTypeStruct).addDef(ResVReg);
-    for (size_t I = 0; I < SPIRVStructNumElements; ++I)
-      MIBStruct.addUse(FieldTypes[I]);
-    for (size_t I = SPIRVStructNumElements; I < NumElements;
-         I += MaxNumElements) {
-      auto MIBCont = MIRBuilder.buildInstr(SPIRV::OpTypeStructContinuedINTEL);
-      for (size_t J = I; J < std::min(I + MaxNumElements, NumElements); ++J)
-        MIBCont.addUse(FieldTypes[I]);
-    }
-    return MIBStruct;
-  });
+  SPIRVType *SPVType =
+      createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
+        auto MIBStruct =
+            MIRBuilder.buildInstr(SPIRV::OpTypeStruct).addDef(ResVReg);
+        for (size_t I = 0; I < SPIRVStructNumElements; ++I)
+          MIBStruct.addUse(FieldTypes[I]);
+        for (size_t I = SPIRVStructNumElements; I < NumElements;
+             I += MaxNumElements) {
+          auto MIBCont =
+              MIRBuilder.buildInstr(SPIRV::OpTypeStructContinuedINTEL);
+          for (size_t J = I; J < std::min(I + MaxNumElements, NumElements); ++J)
+            MIBCont.addUse(FieldTypes[I]);
+        }
+        return MIBStruct;
+      });
+
+  if (ExplicitLayoutRequired)
+    addStructOffsetDecorations(SPVType->defs().begin()->getReg(),
+                               const_cast<StructType *>(Ty), MIRBuilder);
+
+  return SPVType;
 }
 
 SPIRVType *SPIRVGlobalRegistry::getOrCreateSpecialType(
@@ -1013,22 +1050,26 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeFunctionWithArgs(
     const Type *Ty, SPIRVType *RetType,
     const SmallVectorImpl<SPIRVType *> &ArgTypes,
     MachineIRBuilder &MIRBuilder) {
-  if (const MachineInstr *MI = findMI(Ty, &MIRBuilder.getMF()))
+  if (const MachineInstr *MI = findMI(Ty, false, &MIRBuilder.getMF()))
     return MI;
   const MachineInstr *NewMI = getOpTypeFunction(RetType, ArgTypes, MIRBuilder);
-  add(Ty, NewMI);
+  add(Ty, false, NewMI);
   return finishCreatingSPIRVType(Ty, NewMI);
 }
 
 SPIRVType *SPIRVGlobalRegistry::findSPIRVType(
     const Type *Ty, MachineIRBuilder &MIRBuilder,
-    SPIRV::AccessQualifier::AccessQualifier AccQual, bool EmitIR) {
+    SPIRV::AccessQualifier::AccessQualifier AccQual,
+    bool ExplicitLayoutRequired, bool EmitIR) {
   Ty = adjustIntTypeByWidth(Ty);
-  if (const MachineInstr *MI = findMI(Ty, &MIRBuilder.getMF()))
+  // TODO: findMI needs to know if a layout is required.
+  if (const MachineInstr *MI =
+          findMI(Ty, ExplicitLayoutRequired, &MIRBuilder.getMF()))
     return MI;
   if (auto It = ForwardPointerTypes.find(Ty); It != ForwardPointerTypes.end())
     return It->second;
-  return restOfCreateSPIRVType(Ty, MIRBuilder, AccQual, EmitIR);
+  return restOfCreateSPIRVType(Ty, MIRBuilder, AccQual, ExplicitLayoutRequired,
+                               EmitIR);
 }
 
 Register SPIRVGlobalRegistry::getSPIRVTypeID(const SPIRVType *SpirvType) const {
@@ -1062,11 +1103,13 @@ const Type *SPIRVGlobalRegistry::adjustIntTypeByWidth(const Type *Ty) const {
 
 SPIRVType *SPIRVGlobalRegistry::createSPIRVType(
     const Type *Ty, MachineIRBuilder &MIRBuilder,
-    SPIRV::AccessQualifier::AccessQualifier AccQual, bool EmitIR) {
+    SPIRV::AccessQualifier::AccessQualifier AccQual,
+    bool ExplicitLayoutRequired, bool EmitIR) {
   if (isSpecialOpaqueType(Ty))
     return getOrCreateSpecialType(Ty, MIRBuilder, AccQual);
 
-  if (const MachineInstr *MI = findMI(Ty, &MIRBuilder.getMF()))
+  if (const MachineInstr *MI =
+          findMI(Ty, ExplicitLayoutRequired, &MIRBuilder.getMF()))
     return MI;
 
   if (auto IType = dyn_cast<IntegerType>(Ty)) {
@@ -1079,27 +1122,31 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(
   if (Ty->isVoidTy())
     return getOpTypeVoid(MIRBuilder);
   if (Ty->isVectorTy()) {
-    SPIRVType *El = findSPIRVType(cast<FixedVectorType>(Ty)->getElementType(),
-                                  MIRBuilder, AccQual, EmitIR);
+    SPIRVType *El =
+        findSPIRVType(cast<FixedVectorType>(Ty)->getElementType(), MIRBuilder,
+                      AccQual, ExplicitLayoutRequired, EmitIR);
     return getOpTypeVector(cast<FixedVectorType>(Ty)->getNumElements(), El,
                            MIRBuilder);
   }
   if (Ty->isArrayTy()) {
-    SPIRVType *El =
-        findSPIRVType(Ty->getArrayElementType(), MIRBuilder, AccQual, EmitIR);
-    return getOpTypeArray(Ty->getArrayNumElements(), El, MIRBuilder, EmitIR);
+    SPIRVType *El = findSPIRVType(Ty->getArrayElementType(), MIRBuilder,
+                                  AccQual, ExplicitLayoutRequired, EmitIR);
+    return getOpTypeArray(Ty->getArrayNumElements(), El, MIRBuilder,
+                          ExplicitLayoutRequired, EmitIR);
   }
   if (auto SType = dyn_cast<StructType>(Ty)) {
     if (SType->isOpaque())
       return getOpTypeOpaque(SType, MIRBuilder);
-    return getOpTypeStruct(SType, MIRBuilder, AccQual, EmitIR);
+    return getOpTypeStruct(SType, MIRBuilder, AccQual, ExplicitLayoutRequired,
+                           EmitIR);
   }
   if (auto FType = dyn_cast<FunctionType>(Ty)) {
-    SPIRVType *RetTy =
-        findSPIRVType(FType->getReturnType(), MIRBuilder, AccQual, EmitIR);
+    SPIRVType *RetTy = findSPIRVType(FType->getReturnType(), MIRBuilder,
+                                     AccQual, ExplicitLayoutRequired, EmitIR);
     SmallVector<SPIRVType *, 4> ParamTypes;
     for (const auto &ParamTy : FType->params())
-      ParamTypes.push_back(findSPIRVType(ParamTy, MIRBuilder, AccQual, EmitIR));
+      ParamTypes.push_back(findSPIRVType(ParamTy, MIRBuilder, AccQual,
+                                         ExplicitLayoutRequired, EmitIR));
     return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder);
   }
 
@@ -1114,44 +1161,50 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType(
   const SPIRVSubtarget *ST =
       static_cast<const SPIRVSubtarget *>(&MIRBuilder.getMF().getSubtarget());
   auto SC = addressSpaceToStorageClass(AddrSpace, *ST);
-  // Null pointer means we have a loop in type definitions, make and
-  // return corresponding OpTypeForwardPointer.
-  if (SpvElementType == nullptr) {
-    auto [It, Inserted] = ForwardPointerTypes.try_emplace(Ty);
-    if (Inserted)
-      It->second = getOpTypeForwardPointer(SC, MIRBuilder);
-    return It->second;
+
+  Type *ElemTy = ::getPointeeType(Ty);
+  if (!ElemTy) {
+    ElemTy = Type::getInt8Ty(MIRBuilder.getContext());
   }
+
   // If we have forward pointer associated with this type, use its register
   // operand to create OpTypePointer.
   if (auto It = ForwardPointerTypes.find(Ty); It != ForwardPointerTypes.end()) {
     Register Reg = getSPIRVTypeID(It->second);
+    // TODO: what does getOpTypePointer do?
     return getOpTypePointer(SC, SpvElementType, MIRBuilder, Reg);
   }
 
-  return getOrCreateSPIRVPointerType(SpvElementType, MIRBuilder, SC);
+  return getOrCreateSPIRVPointerType(ElemTy, MIRBuilder, SC);
 }
 
 SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType(
     const Type *Ty, MachineIRBuilder &MIRBuilder,
-    SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
+    SPIRV::AccessQualifier::AccessQualifier AccessQual,
+    bool ExplicitLayoutRequired, bool EmitIR) {
+  // TODO: Could this create a problem if one requires an explicit layout, and
+  // the next time it does not?
   if (TypesInProcessing.count(Ty) && !isPointerTyOrWrapper(Ty))
     return nullptr;
   TypesInProcessing.insert(Ty);
-  SPIRVType *SpirvType = createSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR);
+  SPIRVType *SpirvType = createSPIRVType(Ty, MIRBuilder, AccessQual,
+                                         ExplicitLayoutRequired, EmitIR);
   TypesInProcessing.erase(Ty);
   VRegToTypeMap[&MIRBuilder.getMF()][getSPIRVTypeID(SpirvType)] = SpirvType;
+
+  // TODO: We could end up with two SPIR-V types pointing to the same llvm type.
+  // Is that a problem?
   SPIRVToLLVMType[SpirvType] = unifyPtrType(Ty);
 
   if (SpirvType->getOpcode() == SPIRV::OpTypeForwardPointer ||
-      findMI(Ty, &MIRBuilder.getMF()) || isSpecialOpaqueType(Ty))
+      findMI(Ty, false, &MIRBuilder.getMF()) || isSpecialOpaqueType(Ty))
     return SpirvType;
 
   if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
       ExtTy && isTypedPointerWrapper(ExtTy))
     add(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0), SpirvType);
   else if (!isPointerTy(Ty))
-    add(Ty, SpirvType);
+    add(Ty, ExplicitLayoutRequired, SpirvType);
   else if (isTypedPointerTy(Ty))
     add(cast<TypedPointerType>(Ty)->getElementType(),
         getPointerAddressSpace(Ty), SpirvType);
@@ -1183,14 +1236,15 @@ SPIRVType *SPIRVGlobalRegistry::getResultType(Register VReg,
 
 SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
     const Type *Ty, MachineIRBuilder &MIRBuilder,
-    SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) {
+    SPIRV::AccessQualifier::AccessQualifier AccessQual,
+    bool ExplicitLayoutRequired, bool EmitIR) {
   const MachineFunction *MF = &MIRBuilder.getMF();
   Register Reg;
   if (auto *ExtTy = dyn_cast<TargetExtType>(Ty);
       ExtTy && isTypedPointerWrapper(ExtTy))
     Reg = find(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0), MF);
   else if (!isPointerTy(Ty))
-    Reg = find(Ty = adjustIntTypeByWidth(Ty), MF);
+    Reg = find(Ty = adjustIntTypeByWidth(Ty), ExplicitLayoutRequired, MF);
   else if (isTypedPointerTy(Ty))
     Reg = find(cast<TypedPointerType>(Ty)->getElementType(),
                getPointerAddressSpace(Ty), MF);
@@ -1201,15 +1255,20 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(
     return getSPIRVTypeForVReg(Reg);
 
   TypesInProcessing.clear();
-  SPIRVType *STy = restOfCreateSPIRVType(Ty, MIRBuilder, AccessQual, EmitIR);
+  SPIRVType *STy = restOfCreateSPIRVType(Ty, MIRBuilder, AccessQual,
+                                         ExplicitLayoutRequired, EmitIR);
   // Create normal pointer types for the corresponding OpTypeForwardPointers.
   for (auto &CU : ForwardPointerTypes) {
+    // Pointer type themselves do not require an explicit layout. The types
+    // they pointer to might, but that is taken care of when creating the type.
+    bool PtrNeedsLayout = false;
     const Type *Ty2 = CU.first;
     SPIRVType *STy2 = CU.second;
-    if ((Reg = find(Ty2, MF)).isValid())
+    if ((Reg = find(Ty2, PtrNeedsLayout, MF)).isValid())
       STy2 = getSPIRVTypeForVReg(Reg);
     else
-      STy2 = restOfCreateSPIRVType(Ty2, MIRBuilder, AccessQual, EmitIR);
+      STy2 = restOfCreateSPIRVType(Ty2, MIRBuilder, AccessQual, PtrNeedsLayout,
+                                   EmitIR);
     if (Ty == Ty2)
       STy = STy2;
   }
@@ -1238,6 +1297,19 @@ bool SPIRVGlobalRegistry::isScalarOrVectorOfType(Register VReg,
   return false;
 }
 
+bool SPIRVGlobalRegistry::isResourceType(SPIRVType *Type) const {
+  switch (Type->getOpcode()) {
+  case SPIRV::OpTypeImage:
+  case SPIRV::OpTypeSampler:
+  case SPIRV::OpTypeSampledImage:
+    return true;
+  case SPIRV::OpTypeStruct:
+    return hasBlockDecoration(Type);
+  default:
+    return false;
+  }
+  return false;
+}
 unsigned
 SPIRVGlobalRegistry::getScalarOrVectorComponentCount(Register VReg) const {
   return getScalarOrVectorComponentCount(getSPIRVTypeForVReg(VReg));
@@ -1362,16 +1434,16 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateVulkanBufferType(
   if (const MachineInstr *MI = findMI(Key, &MIRBuilder.getMF()))
     return MI;
 
-  // TODO(134119): The SPIRVType for `ElemType` will not have an explicit
-  // layout. This generates invalid SPIR-V.
+  bool ExplicitLayoutRequired = storageClassRequiresExplictLayout(SC);
+  // We need to get the SPIR-V type for the element here, so we can add the
+  // decoration to it.
   auto *T = StructType::create(ElemType);
   auto *BlockType =
-      getOrCreateSPIRVType(T, MIRBuilder, SPIRV::AccessQualifier::None, EmitIr);
+      getOrCreateSPIRVType(T, MIRBuilder, SPIRV::AccessQualifier::None,
+                           ExplicitLayoutRequired, EmitIr);
 
   buildOpDecorate(BlockType->defs().begin()->getReg(), MIRBuilder,
                   SPIRV::Decoration::Block, {});
-  buildOpMemberDecorate(BlockType->defs().begin()->getReg(), MIRBuilder,
-                        SPIRV::Decoration::Offset, 0, {0});
 
   if (!IsWritable) {
     buildOpMemberDecorate(BlockType->defs().begin()->getReg(), MIRBuilder,
@@ -1480,7 +1552,8 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeCoopMatr(
     MachineIRBuilder &MIRBuilder, const TargetExtType *ExtensionType,
     const SPIRVType *ElemType, uint32_t Scope, uint32_t Rows, uint32_t Columns,
     uint32_t Use, bool EmitIR) {
-  if (const MachineInstr *MI = findMI(ExtensionType, &MIRBuilder.getMF()))
+  if (const MachineInstr *MI =
+          findMI(ExtensionType, false, &MIRBuilder.getMF()))
     return MI;
   const MachineInstr *NewMI =
       createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
@@ -1493,26 +1566,26 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeCoopMatr(
             .addUse(buildConstantInt(Columns, MIRBuilder, SpvTypeInt32, EmitIR))
             .addUse(buildConstantInt(Use, MIRBuilder, SpvTypeInt32, EmitIR));
       });
-  add(ExtensionType, NewMI);
+  add(ExtensionType, false, NewMI);
   return NewMI;
 }
 
 SPIRVType *SPIRVGlobalRegistry::getOrCreateOpTypeByOpcode(
     const Type *Ty, MachineIRBuilder &MIRBuilder, unsigned Opcode) {
-  if (const MachineInstr *MI = findMI(Ty, &MIRBuilder.getMF()))
+  if (const MachineInstr *MI = findMI(Ty, false, &MIRBuilder.getMF()))
     return MI;
   const MachineInstr *NewMI =
       createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
         return MIRBuilder.buildInstr(Opcode).addDef(createTypeVReg(MIRBuilder));
       });
-  add(Ty, NewMI);
+  add(Ty, false, NewMI);
   return NewMI;
 }
 
 SPIRVType *SPIRVGlobalRegistry::getOrCreateUnknownType(
     const Type *Ty, MachineIRBuilder &MIRBuilder, unsigned Opcode,
     const ArrayRef<MCOperand> Operands) {
-  if (const MachineInstr *MI = findMI(Ty, &MIRBuilder.getMF()))
+  if (const MachineInstr *MI = findMI(Ty, false, &MIRBuilder.getMF()))
     return MI;
   Register ResVReg = createTypeVReg(MIRBuilder);
   const MachineInstr *NewMI =
@@ -1529,7 +1602,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateUnknownType(
         }
         return MIB;
       });
-  add(Ty, NewMI);
+  add(Ty, false, NewMI);
   return NewMI;
 }
 
@@ -1545,7 +1618,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName(
   if (hasBuiltinTypePrefix(TypeStr))
     return getOrCreateSPIRVType(SPIRV::parseBuiltinTypeNameToTargetExtType(
                                     TypeStr.str(), MIRBuilder.getContext()),
-                                MIRBuilder, AQ, true);
+                                MIRBuilder, AQ, false, true);
 
   // Parse type name in either "typeN" or "type vector[N]" format, where
   // N is the number of elements of the vector.
@@ -1556,7 +1629,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName(
     // Unable to recognize SPIRV type name
     return nullptr;
 
-  auto SpirvTy = getOrCreateSPIRVType(Ty, MIRBuilder, AQ, true);
+  auto SpirvTy = getOrCreateSPIRVType(Ty, MIRBuilder, AQ, false, true);
 
   // Handle "type*" or  "type* vector[N]".
   if (TypeStr.starts_with("*")) {
@@ -1585,7 +1658,7 @@ SPIRVGlobalRegistry::getOrCreateSPIRVIntegerType(unsigned BitWidth,
                                                  MachineIRBuilder &MIRBuilder) {
   return getOrCreateSPIRVType(
       IntegerType::get(MIRBuilder.getMF().getFunction().getContext(), BitWidth),
-      MIRBuilder, SPIRV::AccessQualifier::ReadWrite, true);
+      MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false, true);
 }
 
 SPIRVType *SPIRVGlobalRegistry::finishCreatingSPIRVType(const Type *LLVMTy,
@@ -1601,7 +1674,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth,
                                                      const SPIRVInstrInfo &TII,
                                                      unsigned SPIRVOPcode,
                                                      Type *Ty) {
-  if (const MachineInstr *MI = findMI(Ty, CurMF))
+  if (const MachineInstr *MI = findMI(Ty, false, CurMF))
     return MI;
   MachineBasicBlock &DepMBB = I.getMF()->front();
   MachineIRBuilder MIRBuilder(DepMBB, DepMBB.getFirstNonPHI());
@@ -1613,7 +1686,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth,
             .addImm(BitWidth)
             .addImm(0);
       });
-  add(Ty, NewMI);
+  add(Ty, false, NewMI);
   return finishCreatingSPIRVType(Ty, NewMI);
 }
 
@@ -1654,14 +1727,14 @@ SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineIRBuilder &MIRBuilder,
                                               bool EmitIR) {
   return getOrCreateSPIRVType(
       IntegerType::get(MIRBuilder.getMF().getFunction().getContext(), 1),
-      MIRBuilder, SPIRV::AccessQualifier::ReadWrite, EmitIR);
+      MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false, EmitIR);
 }
 
 SPIRVType *
 SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineInstr &I,
                                               const SPIRVInstrInfo &TII) {
   Type *Ty = IntegerType::get(CurMF->getFunction().getContext(), 1);
-  if (const MachineInstr *MI = findMI(Ty, CurMF))
+  if (const MachineInstr *MI = findMI(Ty, false, CurMF))
     return MI;
   MachineBasicBlock &DepMBB = I.getMF()->front();
   MachineIRBuilder MIRBuilder(DepMBB, DepMBB.getFirstNonPHI());
@@ -1671,7 +1744,7 @@ SPIRVGlobalRegistry::getOrCreateSPIRVBoolType(MachineInstr &I,
                        MIRBuilder.getDL(), TII.get(SPIRV::OpTypeBool))
             .addDef(createTypeVReg(CurMF->getRegInfo()));
       });
-  add(Ty, NewMI);
+  add(Ty, false, NewMI);
   return finishCreatingSPIRVType(Ty, NewMI);
 }
 
@@ -1681,7 +1754,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
   return getOrCreateSPIRVType(
       FixedVectorType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)),
                            NumElements),
-      MIRBuilder, SPIRV::AccessQualifier::ReadWrite, EmitIR);
+      MIRBuilder, SPIRV::AccessQualifier::ReadWrite, false, EmitIR);
 }
 
 SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
@@ -1689,7 +1762,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
     const SPIRVInstrInfo &TII) {
   Type *Ty = FixedVectorType::get(
       const_cast<Type *>(getTypeForSPIRVType(BaseType)), NumElements);
-  if (const MachineInstr *MI = findMI(Ty, CurMF))
+  if (const MachineInstr *MI = findMI(Ty, false, CurMF))
     return MI;
   MachineInstr *DepMI = const_cast<MachineInstr *>(BaseType);
   MachineIRBuilder MIRBuilder(*DepMI->getParent(), DepMI->getIterator());
@@ -1701,30 +1774,7 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVVectorType(
             .addUse(getSPIRVTypeID(BaseType))
             .addImm(NumElements);
       });
-  add(Ty, NewMI);
-  return finishCreatingSPIRVType(Ty, NewMI);
-}
-
-SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVArrayType(
-    SPIRVType *BaseType, unsigned NumElements, MachineInstr &I,
-    const SPIRVInstrInfo &TII) {
-  Type *Ty = ArrayType::get(const_cast<Type *>(getTypeForSPIRVType(BaseType)),
-                            NumElements);
-  if (const MachineInstr *MI = findMI(Ty, CurMF))
-    return MI;
-  SPIRVType *SpvTypeInt32 = getOrCreateSPIRVIntegerType(32, I, TII);
-  Register Len = getOrCreateConstInt(NumElements, I, SpvTypeInt32, TII);
-  MachineBasicBlock &DepMBB = I.getMF()->front();
-  MachineIRBuilder MIRBuilder(DepMBB, getInsertPtValidEnd(&DepMBB));
-  const MachineInstr *NewMI =
-      createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) {
-        return BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(),
-                       MIRBuilder.getDL(), TII.get(SPIRV::OpTypeArray))
-            .addDef(createTypeVReg(CurMF->getRegInfo()))
-            .addUse(getSPIRVTypeID(BaseType))
-            .addUse(Len);
-      });
-  add(Ty, NewMI);
+  add(Ty, false, NewMI);
   return finishCreatingSPIRVType(Ty, NewMI);
 }
 
@@ -1738,8 +1788,11 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(
 SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVPointerType(
     const Type *BaseType, MachineIRBuilder &MIRBuilder,
     SPIRV::StorageClass::StorageClass SC) {
+  // TODO: Need to check if EmitIr should always be true.
   SPIRVType *SpirvBaseType = getOrCreateSPIRVType(
-      BaseType, MIRBuilder, SPIRV::AccessQualifier::ReadWrite, true);
+      BaseType, MIRBuilder, SPIRV::AccessQualifier::ReadWrite,
+      storageClassRequiresExplictLayout(SC), true);
+  assert(SpirvBaseType);
   return getOrCreateSPIRVPointerTypeInternal(SpirvBaseType, MIRBuilder, SC);
 }
 
@@ -2006,3 +2059,33 @@ void SPIRVGlobalRegistry::updateAssignType(CallInst *AssignCI, Value *Arg,
   addDeducedElementType(AssignCI, ElemTy);
   addDeducedElementType(Arg, ElemTy);
 }
+
+void SPIRVGlobalRegistry::addStructOffsetDecorations(
+    Register Reg, StructType *Ty, MachineIRBuilder &MIRBuilder) {
+  DataLayout DL;
+  ArrayRef<TypeSize> Offsets = DL.getStructLayout(Ty)->getMemberOffsets();
+  for (uint32_t I = 0; I < Ty->getNumElements(); ++I) {
+    buildOpMemberDecorate(Reg, MIRBuilder, SPIRV::Decoration::Offset, I,
+                          {static_cast<uint32_t>(Offsets[I])});
+  }
+}
+
+void SPIRVGlobalRegistry::addArrayStrideDecorations(
+    Register Reg, Type *ElementType, MachineIRBuilder &MIRBuilder) {
+  uint32_t SizeInBytes = DataLayout().getTypeSizeInBits(ElementType) / 8;
+  buildOpDecorate(Reg, MIRBuilder, SPIRV::Decoration::ArrayStride,
+                  {SizeInBytes});
+}
+
+bool SPIRVGlobalRegistry::hasBlockDecoration(SPIRVType *Type) const {
+  Register Def = getSPIRVTypeID(Type);
+  for (const MachineInstr &Use :
+       Type->getMF()->getRegInfo().use_instructions(Def)) {
+    if (Use.getOpcode() != SPIRV::OpDecorate)
+      continue;
+
+    if (Use.getOperand(1).getImm() == SPIRV::Decoration::Block)
+      return true;
+  }
+  return false;
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
index b05896fb7174c..7338e805956d6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h
@@ -90,14 +90,14 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
   // Add a new OpTypeXXX instruction without checking for duplicates.
   SPIRVType *createSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder,
                              SPIRV::AccessQualifier::AccessQualifier AQ,
-                             bool EmitIR);
+                             bool ExplicitLayoutRequired, bool EmitIR);
   SPIRVType *findSPIRVType(const Type *Ty, MachineIRBuilder &MIRBuilder,
                            SPIRV::AccessQualifier::AccessQualifier accessQual,
-                           bool EmitIR);
+                           bool ExplicitLayoutRequired, bool EmitIR);
   SPIRVType *
   restOfCreateSPIRVType(const Type *Type, MachineIRBuilder &MIRBuilder,
                         SPIRV::AccessQualifier::AccessQualifier AccessQual,
-                        bool EmitIR);
+                        bool ExplicitLayoutRequired, bool EmitIR);
 
   // Internal function creating the an OpType at the correct position in the
   // function by tweaking the passed "MIRBuilder" insertion point and restoring
@@ -298,10 +298,19 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
   // EmitIR controls if we emit GMIR or SPV constants (e.g. for array sizes)
   // because this method may be called from InstructionSelector and we don't
   // want to emit extra IR instructions there.
+  SPIRVType *getOrCreateSPIRVType(const Type *Type, MachineInstr &I,
+                                  SPIRV::AccessQualifier::AccessQualifier AQ,
+                                  bool EmitIR) {
+    MachineIRBuilder MIRBuilder(I);
+    return getOrCreateSPIRVType(Type, MIRBuilder, AQ, EmitIR);
+  }
+
   SPIRVType *getOrCreateSPIRVType(const Type *Type,
                                   MachineIRBuilder &MIRBuilder,
                                   SPIRV::AccessQualifier::AccessQualifier AQ,
-                                  bool EmitIR);
+                                  bool EmitIR) {
+    return getOrCreateSPIRVType(Type, MIRBuilder, AQ, false, EmitIR);
+  }
 
   const Type *getTypeForSPIRVType(const SPIRVType *Ty) const {
     auto Res = SPIRVToLLVMType.find(Ty);
@@ -364,6 +373,10 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
   // opcode (e.g. OpTypeBool, or OpTypeVector %x 4, where %x is OpTypeBool).
   bool isScalarOrVectorOfType(Register VReg, unsigned TypeOpcode) const;
 
+  // Returns true if `Type` is a resource type. This could be an image type
+  // or a struct for a buffer decorated with the block decoration.
+  bool isResourceType(SPIRVType *Type) const;
+
   // Return number of elements in a vector if the argument is associated with
   // a vector type. Return 1 for a scalar type, and 0 for a missing type.
   unsigned getScalarOrVectorComponentCount(Register VReg) const;
@@ -414,6 +427,11 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
   const Type *adjustIntTypeByWidth(const Type *Ty) const;
   unsigned adjustOpTypeIntWidth(unsigned Width) const;
 
+  SPIRVType *getOrCreateSPIRVType(const Type *Type,
+                                  MachineIRBuilder &MIRBuilder,
+                                  SPIRV::AccessQualifier::AccessQualifier AQ,
+                                  bool ExplicitLayoutRequired, bool EmitIR);
+
   SPIRVType *getOpTypeInt(unsigned Width, MachineIRBuilder &MIRBuilder,
                           bool IsSigned = false);
 
@@ -425,14 +443,15 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
                              MachineIRBuilder &MIRBuilder);
 
   SPIRVType *getOpTypeArray(uint32_t NumElems, SPIRVType *ElemType,
-                            MachineIRBuilder &MIRBuilder, bool EmitIR);
+                            MachineIRBuilder &MIRBuilder,
+                            bool ExplicitLayoutRequired, bool EmitIR);
 
   SPIRVType *getOpTypeOpaque(const StructType *Ty,
                              MachineIRBuilder &MIRBuilder);
 
   SPIRVType *getOpTypeStruct(const StructType *Ty, MachineIRBuilder &MIRBuilder,
                              SPIRV::AccessQualifier::AccessQualifier AccQual,
-                             bool EmitIR);
+                             bool ExplicitLayoutRequired, bool EmitIR);
 
   SPIRVType *getOpTypePointer(SPIRV::StorageClass::StorageClass SC,
                               SPIRVType *ElemType, MachineIRBuilder &MIRBuilder,
@@ -475,6 +494,12 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
                                       MachineIRBuilder &MIRBuilder,
                                       SPIRV::StorageClass::StorageClass SC);
 
+  void addStructOffsetDecorations(Register Reg, StructType *Ty,
+                                  MachineIRBuilder &MIRBuilder);
+  void addArrayStrideDecorations(Register Reg, Type *ElementType,
+                                 MachineIRBuilder &MIRBuilder);
+  bool hasBlockDecoration(SPIRVType *Type) const;
+
 public:
   Register buildConstantInt(uint64_t Val, MachineIRBuilder &MIRBuilder,
                             SPIRVType *SpvType, bool EmitIR,
@@ -545,9 +570,6 @@ class SPIRVGlobalRegistry : public SPIRVIRMapping {
   SPIRVType *getOrCreateSPIRVVectorType(SPIRVType *BaseType,
                                         unsigned NumElements, MachineInstr &I,
                                         const SPIRVInstrInfo &TII);
-  SPIRVType *getOrCreateSPIRVArrayType(SPIRVType *BaseType,
-                                       unsigned NumElements, MachineInstr &I,
-                                       const SPIRVInstrInfo &TII);
 
   // Returns a pointer to a SPIR-V pointer type with the given base type and
   // storage class. The base type will be translated to a SPIR-V type, and the
diff --git a/llvm/lib/Target/SPIRV/SPIRVIRMapping.h b/llvm/lib/Target/SPIRV/SPIRVIRMapping.h
index 9c9c099bc5fc4..a329fd5ed9d29 100644
--- a/llvm/lib/Target/SPIRV/SPIRVIRMapping.h
+++ b/llvm/lib/Target/SPIRV/SPIRVIRMapping.h
@@ -66,6 +66,7 @@ enum SpecialTypeKind {
   STK_Value,
   STK_MachineInstr,
   STK_VkBuffer,
+  STK_ExplictLayoutType,
   STK_Last = -1
 };
 
@@ -150,6 +151,11 @@ inline IRHandle irhandle_vkbuffer(const Type *ElementType,
                          SpecialTypeKind::STK_VkBuffer);
 }
 
+inline IRHandle irhandle_explict_layout_type(const Type *Ty) {
+  const Type *WrpTy = unifyPtrType(Ty);
+  return irhandle_ptr(WrpTy, Ty->getTypeID(), STK_ExplictLayoutType);
+}
+
 inline IRHandle handle(const Type *Ty) {
   const Type *WrpTy = unifyPtrType(Ty);
   return irhandle_ptr(WrpTy, Ty->getTypeID(), STK_Type);
@@ -163,6 +169,10 @@ inline IRHandle handle(const MachineInstr *KeyMI) {
   return irhandle_ptr(KeyMI, SPIRV::to_hash(KeyMI), STK_MachineInstr);
 }
 
+inline bool type_has_layout_decoration(const Type *T) {
+  return (isa<StructType>(T) || isa<ArrayType>(T));
+}
+
 } // namespace SPIRV
 
 // Bi-directional mappings between LLVM entities and (v-reg, machine function)
@@ -238,14 +248,49 @@ class SPIRVIRMapping {
     return findMI(SPIRV::irhandle_pointee(PointeeTy, AddressSpace), MF);
   }
 
-  template <typename T> bool add(const T *Obj, const MachineInstr *MI) {
+  bool add(const Value *V, const MachineInstr *MI) {
+    return add(SPIRV::handle(V), MI);
+  }
+
+  bool add(const Type *T, bool RequiresExplicitLayout, const MachineInstr *MI) {
+    if (RequiresExplicitLayout && SPIRV::type_has_layout_decoration(T)) {
+      return add(SPIRV::irhandle_explict_layout_type(T), MI);
+    }
+    return add(SPIRV::handle(T), MI);
+  }
+
+  bool add(const MachineInstr *Obj, const MachineInstr *MI) {
     return add(SPIRV::handle(Obj), MI);
   }
-  template <typename T> Register find(const T *Obj, const MachineFunction *MF) {
-    return find(SPIRV::handle(Obj), MF);
+
+  Register find(const Value *V, const MachineFunction *MF) {
+    return find(SPIRV::handle(V), MF);
+  }
+
+  Register find(const Type *T, bool RequiresExplicitLayout,
+                const MachineFunction *MF) {
+    if (RequiresExplicitLayout && SPIRV::type_has_layout_decoration(T))
+      return find(SPIRV::irhandle_explict_layout_type(T), MF);
+    return find(SPIRV::handle(T), MF);
+  }
+
+  Register find(const MachineInstr *MI, const MachineFunction *MF) {
+    return find(SPIRV::handle(MI), MF);
+  }
+
+  const MachineInstr *findMI(const Value *Obj, const MachineFunction *MF) {
+    return findMI(SPIRV::handle(Obj), MF);
+  }
+
+  const MachineInstr *findMI(const Type *T, bool RequiresExplicitLayout,
+                             const MachineFunction *MF) {
+    if (RequiresExplicitLayout && SPIRV::type_has_layout_decoration(T))
+      return findMI(SPIRV::irhandle_explict_layout_type(T), MF);
+    return findMI(SPIRV::handle(T), MF);
   }
-  template <typename T>
-  const MachineInstr *findMI(const T *Obj, const MachineFunction *MF) {
+
+  const MachineInstr *findMI(const MachineInstr *Obj,
+                             const MachineFunction *MF) {
     return findMI(SPIRV::handle(Obj), MF);
   }
 };
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index 216c3e26be1bf..8a873426e78d8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -25,6 +25,42 @@
 
 using namespace llvm;
 
+// Returns true of the types logically match, as defined in
+// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpCopyLogical.
+static bool typesLogicallyMatch(const SPIRVType *Ty1, const SPIRVType *Ty2,
+                                SPIRVGlobalRegistry &GR) {
+  if (Ty1->getOpcode() != Ty2->getOpcode())
+    return false;
+
+  if (Ty1->getNumOperands() != Ty2->getNumOperands())
+    return false;
+
+  if (Ty1->getOpcode() == SPIRV::OpTypeArray) {
+    // Array must have the same size.
+    if (Ty1->getOperand(2).getReg() != Ty2->getOperand(2).getReg())
+      return false;
+
+    SPIRVType *ElemType1 = GR.getSPIRVTypeForVReg(Ty1->getOperand(1).getReg());
+    SPIRVType *ElemType2 = GR.getSPIRVTypeForVReg(Ty2->getOperand(1).getReg());
+    return ElemType1 == ElemType2 ||
+           typesLogicallyMatch(ElemType1, ElemType2, GR);
+  }
+
+  if (Ty1->getOpcode() == SPIRV::OpTypeStruct) {
+    for (unsigned I = 1; I < Ty1->getNumOperands(); I++) {
+      SPIRVType *ElemType1 =
+          GR.getSPIRVTypeForVReg(Ty1->getOperand(I).getReg());
+      SPIRVType *ElemType2 =
+          GR.getSPIRVTypeForVReg(Ty2->getOperand(I).getReg());
+      if (ElemType1 != ElemType2 &&
+          !typesLogicallyMatch(ElemType1, ElemType2, GR))
+        return false;
+    }
+    return true;
+  }
+  return false;
+}
+
 unsigned SPIRVTargetLowering::getNumRegistersForCallingConv(
     LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
   // This code avoids CallLowering fail inside getVectorTypeBreakdown
@@ -374,6 +410,9 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const {
         // implies that %Op is a pointer to <ResType>
       case SPIRV::OpLoad:
         // OpLoad <ResType>, ptr %Op implies that %Op is a pointer to <ResType>
+        if (enforcePtrTypeCompatibility(MI, 2, 0))
+          break;
+
         validatePtrTypes(STI, MRI, GR, MI, 2,
                          GR.getSPIRVTypeForVReg(MI.getOperand(0).getReg()));
         break;
@@ -531,3 +570,58 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const {
   ProcessedMF.insert(&MF);
   TargetLowering::finalizeLowering(MF);
 }
+
+// Modifies either operand PtrOpIdx or OpIdx so that the pointee type of
+// PtrOpIdx matches the type for operand OpIdx. Returns true if they already
+// match or if the instruction was modified to make them match.
+bool SPIRVTargetLowering::enforcePtrTypeCompatibility(
+    MachineInstr &I, unsigned int PtrOpIdx, unsigned int OpIdx) const {
+  SPIRVGlobalRegistry &GR = *STI.getSPIRVGlobalRegistry();
+  SPIRVType *PtrType = GR.getResultType(I.getOperand(PtrOpIdx).getReg());
+  SPIRVType *PointeeType = GR.getPointeeType(PtrType);
+  SPIRVType *OpType = GR.getResultType(I.getOperand(OpIdx).getReg());
+
+  if (PointeeType == OpType)
+    return true;
+
+  if (typesLogicallyMatch(PointeeType, OpType, GR)) {
+    // Apply OpCopyLogical to OpIdx.
+    if (I.getOperand(OpIdx).isDef() &&
+        insertLogicalCopyOnResult(I, PointeeType)) {
+      return true;
+    }
+
+    llvm_unreachable("Unable to add OpCopyLogical yet.");
+    return false;
+  }
+
+  return false;
+}
+
+bool SPIRVTargetLowering::insertLogicalCopyOnResult(
+    MachineInstr &I, SPIRVType *NewResultType) const {
+  MachineRegisterInfo *MRI = &I.getMF()->getRegInfo();
+  SPIRVGlobalRegistry &GR = *STI.getSPIRVGlobalRegistry();
+
+  Register NewResultReg =
+      createVirtualRegister(NewResultType, &GR, MRI, *I.getMF());
+  Register NewTypeReg = GR.getSPIRVTypeID(NewResultType);
+
+  assert(std::distance(I.defs().begin(), I.defs().end()) == 1 &&
+         "Expected only one def");
+  MachineOperand &OldResult = *I.defs().begin();
+  Register OldResultReg = OldResult.getReg();
+  MachineOperand &OldType = *I.uses().begin();
+  Register OldTypeReg = OldType.getReg();
+
+  OldResult.setReg(NewResultReg);
+  OldType.setReg(NewTypeReg);
+
+  MachineIRBuilder MIB(*I.getNextNode());
+  return MIB.buildInstr(SPIRV::OpCopyLogical)
+      .addDef(OldResultReg)
+      .addUse(OldTypeReg)
+      .addUse(NewResultReg)
+      .constrainAllUses(*STI.getInstrInfo(), *STI.getRegisterInfo(),
+                        *STI.getRegBankInfo());
+}
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
index eb78299b72f04..9025e6eb0842e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h
@@ -71,6 +71,11 @@ class SPIRVTargetLowering : public TargetLowering {
                                       EVT ConditionVT) const override {
     return ConditionVT.getSimpleVT();
   }
+
+  bool enforcePtrTypeCompatibility(MachineInstr &I, unsigned PtrOpIdx,
+                                   unsigned OpIdx) const;
+  bool insertLogicalCopyOnResult(MachineInstr &I,
+                                 SPIRVType *NewResultType) const;
 };
 } // namespace llvm
 
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StructuredBuffer.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StructuredBuffer.ll
index fc8faa7300534..f539fdefa3fa2 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-resources/StructuredBuffer.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StructuredBuffer.ll
@@ -11,17 +11,18 @@ declare target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handle
 
 ; CHECK: OpDecorate [[BufferVar:%.+]] DescriptorSet 0
 ; CHECK: OpDecorate [[BufferVar]] Binding 0
-; CHECK: OpDecorate [[BufferType:%.+]] Block
-; CHECK: OpMemberDecorate [[BufferType]] 0 Offset 0
+; CHECK: OpMemberDecorate [[BufferType:%.+]] 0 Offset 0
+; CHECK: OpDecorate [[BufferType]] Block
 ; CHECK: OpMemberDecorate [[BufferType]] 0 NonWritable
 ; CHECK: OpDecorate [[RWBufferVar:%.+]] DescriptorSet 0
 ; CHECK: OpDecorate [[RWBufferVar]] Binding 1
-; CHECK: OpDecorate [[RWBufferType:%.+]] Block
-; CHECK: OpMemberDecorate [[RWBufferType]] 0 Offset 0
+; CHECK: OpDecorate [[ArrayType:%.+]] ArrayStride 4
+; CHECK: OpMemberDecorate [[RWBufferType:%.+]] 0 Offset 0
+; CHECK: OpDecorate [[RWBufferType]] Block
 
 
 ; CHECK: [[int:%[0-9]+]] = OpTypeInt 32 0
-; CHECK: [[ArrayType:%.+]] = OpTypeRuntimeArray
+; CHECK: [[ArrayType]] = OpTypeRuntimeArray
 ; CHECK: [[RWBufferType]] = OpTypeStruct [[ArrayType]]
 ; CHECK: [[RWBufferPtrType:%.+]] = OpTypePointer StorageBuffer [[RWBufferType]]
 ; CHECK: [[BufferType]] = OpTypeStruct [[ArrayType]]
diff --git a/llvm/test/CodeGen/SPIRV/spirv-explicit-layout.ll b/llvm/test/CodeGen/SPIRV/spirv-explicit-layout.ll
new file mode 100644
index 0000000000000..7303471c9929c
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/spirv-explicit-layout.ll
@@ -0,0 +1,149 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv1.6-vulkan1.3-library %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.6-vulkan1.3-library %s -o - -filetype=obj | spirv-val %}
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
+
+; CHECK-DAG: OpName [[ScalarBlock_var:%[0-9]+]] "__resource_p_12_{_u32[0]}_0_0"
+; CHECK-DAG: OpName [[buffer_var:%[0-9]+]] "__resource_p_12_{_{_{_u32_f32[3]}[10]}[0]}_0_0"
+; CHECK-DAG: OpName [[array_buffer_var:%[0-9]+]] "__resource_p_12_{_{_{_u32_f32[3]}[10]}[0]}[10]_0_0"
+
+; CHECK-DAG: OpMemberDecorate [[ScalarBlock:%[0-9]+]] 0 Offset 0
+; CHECK-DAG: OpDecorate [[ScalarBlock]] Block
+; CHECK-DAG: OpMemberDecorate [[ScalarBlock]] 0 NonWritable
+; CHECK-DAG: OpMemberDecorate [[T_explicit:%[0-9]+]] 0 Offset 0
+; CHECK-DAG: OpMemberDecorate [[T_explicit]] 1 Offset 16
+; CHECK-DAG: OpDecorate [[T_array_explicit:%[0-9]+]] ArrayStride 32
+; CHECK-DAG: OpMemberDecorate [[S_explicit:%[0-9]+]] 0 Offset 0
+; CHECK-DAG: OpDecorate [[S_array_explicit:%[0-9]+]] ArrayStride 320
+; CHECK-DAG: OpMemberDecorate [[block:%[0-9]+]] 0 Offset 0
+; CHECK-DAG: OpDecorate [[block]] Block
+; CHECK-DAG: OpMemberDecorate [[block]] 0 NonWritable
+
+; CHECK-DAG: [[float:%[0-9]+]] = OpTypeFloat 32
+; CHECK-DAG: [[v3f:%[0-9]+]] = OpTypeVector [[float]] 3
+; CHECK-DAG: [[uint:%[0-9]+]] = OpTypeInt 32 0
+; CHECK-DAG: [[T:%[0-9]+]] = OpTypeStruct [[uint]] [[v3f]]
+; CHECK-DAG: [[T_explicit]] = OpTypeStruct [[uint]] [[v3f]]
+%struct.T = type { i32, <3 x float> }
+
+; CHECK-DAG: [[zero:%[0-9]+]] = OpConstant [[uint]] 0{{$}}
+; CHECK-DAG: [[one:%[0-9]+]] = OpConstant [[uint]] 1{{$}}
+; CHECK-DAG: [[ten:%[0-9]+]] = OpConstant [[uint]] 10
+; CHECK-DAG: [[T_array:%[0-9]+]] = OpTypeArray [[T]] [[ten]]
+; CHECK-DAG: [[S:%[0-9]+]] = OpTypeStruct [[T_array]]
+; CHECK-DAG: [[T_array_explicit]] = OpTypeArray [[T_explicit]] [[ten]]
+; CHECK-DAG: [[S_explicit]] = OpTypeStruct [[T_array_explicit]]
+%struct.S = type { [10 x %struct.T] }
+
+; CHECK-DAG: [[private_S_ptr:%[0-9]+]] = OpTypePointer Private [[S]]
+; CHECK-DAG: [[private_var:%[0-9]+]] = OpVariable [[private_S_ptr]] Private
+ at private = internal addrspace(10) global %struct.S poison
+
+; CHECK-DAG: [[storagebuffer_S_ptr:%[0-9]+]] = OpTypePointer StorageBuffer [[S_explicit]]
+; CHECK-DAG: [[storage_buffer:%[0-9]+]] = OpVariable [[storagebuffer_S_ptr]] StorageBuffer
+ at storage_buffer = internal addrspace(11) global %struct.S poison
+
+; CHECK-DAG: [[storagebuffer_int_ptr:%[0-9]+]] = OpTypePointer StorageBuffer [[uint]]
+; CHECK-DAG: [[ScalarBlock_array:%[0-9]+]] = OpTypeRuntimeArray [[uint]]
+; CHECK-DAG: [[ScalarBlock]] = OpTypeStruct [[ScalarBlock_array]]
+; CHECK-DAG: [[ScalarBlock_ptr:%[0-9]+]] = OpTypePointer StorageBuffer [[ScalarBlock]]
+; CHECK-DAG: [[ScalarBlock_var]] = OpVariable [[ScalarBlock_ptr]] StorageBuffer
+
+
+; CHECK-DAG: [[S_array_explicit]] = OpTypeRuntimeArray [[S_explicit]]
+; CHECK-DAG: [[block]] = OpTypeStruct [[S_array_explicit]]
+; CHECK-DAG: [[buffer_ptr:%[0-9]+]] = OpTypePointer StorageBuffer [[block]]
+; CHECK-DAG: [[buffer_var]] = OpVariable [[buffer_ptr]] StorageBuffer
+
+; CHECK-DAG: [[array_buffer:%[0-9]+]] = OpTypeArray [[block]] [[ten]]
+; CHECK-DAG: [[array_buffer_ptr:%[0-9]+]] = OpTypePointer StorageBuffer [[array_buffer]]
+; CHECK-DAG: [[array_buffer_var]] = OpVariable [[array_buffer_ptr]] StorageBuffer
+
+; CHECK: OpFunction [[uint]] None
+define external i32 @scalar_vulkan_buffer_load() {
+; CHECK-NEXT: OpLabel
+entry:
+; CHECK-NEXT: [[handle:%[0-9]+]] = OpCopyObject [[ScalarBlock_ptr]] [[ScalarBlock_var]]
+  %handle = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 0) @llvm.spv.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+
+; CHECK-NEXT: [[ptr:%[0-9]+]] = OpAccessChain [[storagebuffer_int_ptr]] [[handle]] [[zero]] [[one]]
+  %0 = tail call noundef nonnull align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer(target("spirv.VulkanBuffer", [0 x i32], 12, 0) %handle, i32 1)
+
+; CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad [[uint]] [[ptr]] Aligned 4
+  %1 = load i32, ptr addrspace(11) %0, align 4
+
+; CHECK-NEXT: OpReturnValue [[ld]]
+  ret i32 %1
+
+; CHECK-NEXT: OpFunctionEnd
+}
+
+; CHECK: OpFunction [[S]] None
+define external %struct.S @private_load() {
+; CHECK-NEXT: OpLabel
+entry:
+
+; CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad [[S]] [[private_var]] Aligned 4
+  %1 = load %struct.S, ptr addrspace(10) @private, align 4
+
+; CHECK-NEXT: OpReturnValue [[ld]]
+  ret %struct.S %1
+
+; CHECK-NEXT: OpFunctionEnd
+}
+
+; CHECK: OpFunction [[S]] None
+define external %struct.S @storage_buffer_load() {
+; CHECK-NEXT: OpLabel
+entry:
+
+; CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad [[S_explicit]] [[storage_buffer]] Aligned 4
+; CHECK-NEXT: [[copy:%[0-9]+]] = OpCopyLogical [[S]] [[ld]]
+  %1 = load %struct.S, ptr addrspace(11) @storage_buffer, align 4
+
+; CHECK-NEXT: OpReturnValue [[copy]]
+  ret %struct.S %1
+
+; CHECK-NEXT: OpFunctionEnd
+}
+
+; CHECK: OpFunction [[S]] None
+define external %struct.S @vulkan_buffer_load() {
+; CHECK-NEXT: OpLabel
+entry:
+; CHECK-NEXT: [[handle:%[0-9]+]] = OpCopyObject [[buffer_ptr]] [[buffer_var]]
+  %handle = tail call target("spirv.VulkanBuffer", [0 x %struct.S], 12, 0) @llvm.spv.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+
+; CHECK-NEXT: [[ptr:%[0-9]+]] = OpAccessChain [[storagebuffer_S_ptr]] [[handle]] [[zero]] [[one]]
+  %0 = tail call noundef nonnull align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 0) %handle, i32 1)
+
+; CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad [[S_explicit]] [[ptr]] Aligned 4
+; CHECK-NEXT: [[copy:%[0-9]+]] = OpCopyLogical [[S]] [[ld]]
+  %1 = load %struct.S, ptr addrspace(11) %0, align 4
+
+; CHECK-NEXT: OpReturnValue [[copy]]
+  ret %struct.S %1
+
+; CHECK-NEXT: OpFunctionEnd
+}
+
+; CHECK: OpFunction [[S]] None
+define external %struct.S @array_of_vulkan_buffers_load() {
+; CHECK-NEXT: OpLabel
+entry:
+; CHECK-NEXT: [[h:%[0-9]+]] = OpAccessChain [[buffer_ptr]] [[array_buffer_var]] [[one]]
+; CHECK-NEXT: [[handle:%[0-9]+]] = OpCopyObject [[buffer_ptr]] [[h]]
+  %handle = tail call target("spirv.VulkanBuffer", [0 x %struct.S], 12, 0) @llvm.spv.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 1, i1 false)
+
+; CHECK-NEXT: [[ptr:%[0-9]+]] = OpAccessChain [[storagebuffer_S_ptr]] [[handle]] [[zero]] [[one]]
+  %0 = tail call noundef nonnull align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer(target("spirv.VulkanBuffer", [0 x %struct.S], 12, 0) %handle, i32 1)
+
+; CHECK-NEXT: [[ld:%[0-9]+]] = OpLoad [[S_explicit]] [[ptr]] Aligned 4
+; CHECK-NEXT: [[copy:%[0-9]+]] = OpCopyLogical [[S]] [[ld]]
+  %1 = load %struct.S, ptr addrspace(11) %0, align 4
+
+; CHECK-NEXT: OpReturnValue [[copy]]
+  ret %struct.S %1
+
+; CHECK-NEXT: OpFunctionEnd
+}

>From f8afefa05a15455bb7a84d5cd9b18ad88913d6de Mon Sep 17 00:00:00 2001
From: Henrich Lauko <xlauko at mail.muni.cz>
Date: Wed, 7 May 2025 19:31:18 +0200
Subject: [PATCH 021/115] [CIR] Remove implicit options from tablegen files
 (#138860)

This mirrors incubator changes from https://github.com/llvm/clangir/pull/1602
---
 clang/include/clang/CIR/Dialect/IR/CIROps.td | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 8d01db03cb3fa..7ffa10464dcd3 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -384,8 +384,6 @@ def AllocaOp : CIR_Op<"alloca", [
     `]`
     ($annotations^)? attr-dict
   }];
-
-  let hasVerifier = 0;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1514,9 +1512,6 @@ def TernaryOp : CIR_Op<"ternary",
       >
   ];
 
-  // All constraints already verified elsewhere.
-  let hasVerifier = 0;
-
   let assemblyFormat = [{
     `(` $cond `,`
       `true` $trueRegion `,`
@@ -1654,9 +1649,6 @@ def GetGlobalOp : CIR_Op<"get_global",
   let assemblyFormat = [{
     $name `:` qualified(type($addr)) attr-dict
   }];
-
-  // `GetGlobalOp` is fully verified by its traits.
-  let hasVerifier = 0;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1839,7 +1831,6 @@ class CIR_CallOpBase<string mnemonic, list<Trait> extra_traits = []>
 
   let hasCustomAssemblyFormat = 1;
   let skipDefaultBuilders = 1;
-  let hasVerifier = 0;
 
   // TODO(cir): for now cir.call is just a tiny shell of what it will become.
   // More attributes, arguments, and properties will be added in the future as

>From 4ff9db68a30b3298be2e56abc47c92d3412e1543 Mon Sep 17 00:00:00 2001
From: Alejandro Colomar <foss+github at alejandro-colomar.es>
Date: Wed, 7 May 2025 19:33:40 +0200
Subject: [PATCH 022/115] Add more tests for _Countof (#133333)

Link: <https://github.com/llvm/llvm-project/issues/102836>
Link: <https://github.com/llvm/llvm-project/pull/133125>
---
 clang/test/C/C2y/n3369.c | 52 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/clang/test/C/C2y/n3369.c b/clang/test/C/C2y/n3369.c
index 389828b52b6a2..db26040d8cf44 100644
--- a/clang/test/C/C2y/n3369.c
+++ b/clang/test/C/C2y/n3369.c
@@ -17,7 +17,11 @@
 #error "Expected to have _Countof support"
 #endif
 
+#define NULL  ((void *) 0)
+
 int global_array[12];
+int global_multi_array[12][34];
+int global_num;
 
 void test_parsing_failures() {
   (void)_Countof;     // expected-error {{expected expression}}
@@ -36,6 +40,12 @@ void test_semantic_failures() {
                                 expected-note {{forward declaration of 'struct S'}}
   struct T { int x; };
   (void)_Countof(struct T);  // expected-error {{'_Countof' requires an argument of array type; 'struct T' invalid}}
+  struct U { int x[3]; };
+  (void)_Countof(struct U);  // expected-error {{'_Countof' requires an argument of array type; 'struct U' invalid}}
+  int a[3];
+  (void)_Countof(&a);  // expected-error {{'_Countof' requires an argument of array type; 'int (*)[3]' invalid}}
+  int *p;
+  (void)_Countof(p);  // expected-error {{'_Countof' requires an argument of array type; 'int *' invalid}}
 }
 
 void test_constant_expression_behavior(int n) {
@@ -81,6 +91,22 @@ void test_with_function_param(int array[12], int (*array_ptr)[12], int static_ar
   (void)_Countof(static_array); // expected-error {{'_Countof' requires an argument of array type; 'int *' invalid}}
 }
 
+void test_func_fix_fix(int i, char (*a)[3][5], int (*x)[_Countof(*a)], char (*)[_Generic(x, int (*)[3]: 1)]);  // expected-note {{passing argument to parameter}}
+void test_func_fix_var(int i, char (*a)[3][i], int (*x)[_Countof(*a)], char (*)[_Generic(x, int (*)[3]: 1)]);  // expected-note {{passing argument to parameter}}
+void test_func_fix_uns(int i, char (*a)[3][*], int (*x)[_Countof(*a)], char (*)[_Generic(x, int (*)[3]: 1)]);  // expected-note {{passing argument to parameter}}
+
+void test_funcs() {
+  int i3[3];
+  int i5[5];
+  char c35[3][5];
+  test_func_fix_fix(5, &c35, &i3, NULL);
+  test_func_fix_fix(5, &c35, &i5, NULL); // expected-warning {{incompatible pointer types passing 'int (*)[5]' to parameter of type 'int (*)[3]'}}
+  test_func_fix_var(5, &c35, &i3, NULL);
+  test_func_fix_var(5, &c35, &i5, NULL); // expected-warning {{incompatible pointer types passing 'int (*)[5]' to parameter of type 'int (*)[3]'}}
+  test_func_fix_uns(5, &c35, &i3, NULL);
+  test_func_fix_uns(5, &c35, &i5, NULL); // expected-warning {{incompatible pointer types passing 'int (*)[5]' to parameter of type 'int (*)[3]'}}
+}
+
 void test_multidimensional_arrays() {
   int array[12][7];
   static_assert(_Countof(array) == 12);
@@ -102,6 +128,11 @@ void test_unspecified_array_length() {
   static_assert(_Countof(**x) == 3);
 }
 
+void test_completed_array() {
+  int a[] = {1, 2, global_num};
+  static_assert(_Countof(a) == 3);
+}
+
 // Test that the return type of _Countof is what you'd expect (size_t).
 void test_return_type() {
   static_assert(_Generic(typeof(_Countof global_array), typeof(sizeof(0)) : 1, default : 0));
@@ -121,10 +152,14 @@ void test_typedefs() {
   static_assert(_Countof(*x) == 12);
 }
 
-void test_zero_size_arrays() {
+void test_zero_size_arrays(int n) {
   int array[0]; // expected-warning {{zero size arrays are an extension}}
   static_assert(_Countof(array) == 0);
   static_assert(_Countof(int[0]) == 0); // expected-warning {{zero size arrays are an extension}}
+  int multi_array[0][n]; // FIXME: Should trigger -Wzero-length-array
+  static_assert(_Countof(multi_array) == 0);
+  int another_one[0][3]; // expected-warning {{zero size arrays are an extension}}
+  static_assert(_Countof(another_one) == 0);
 }
 
 void test_struct_members() {
@@ -144,3 +179,18 @@ void test_compound_literals() {
   static_assert(_Countof((int[2]){}) == 2);
   static_assert(_Countof((int[]){1, 2, 3, 4}) == 4);	
 }
+
+/* We don't get a diagnostic for test_f1(), because it ends up unused
+ * as _Countof() results in an integer constant expression, which is not
+ * evaluated.  However, test_f2() ends up being evaluated, since 'a' is
+ * a VLA.
+ */
+static int test_f1();
+static int test_f2(); // FIXME: Should trigger function 'test_f2' has internal linkage but is not defined
+
+void test_symbols() {
+  int a[global_num][global_num];
+
+  static_assert(_Countof(global_multi_array[test_f1()]) == 34);
+  (void)_Countof(a[test_f2()]);
+}

>From d56f23e4083d8c5aaf123da0f3adfeda50e47417 Mon Sep 17 00:00:00 2001
From: Jason Eckhardt <jeckhardt at nvidia.com>
Date: Wed, 7 May 2025 12:36:17 -0500
Subject: [PATCH 023/115] [AsmParser] Replace starIsStartOfStatement with
 tokenIsStartOfStatement. (#137997)

Currently `MCTargetAsmParser::starIsStartOfStatement` checks for `*` at
the start of the statement. There are other (currently) downstream
back-ends that need the same treatment for other tokens. Instead of
introducing bespoke APIs for each such token, we generalize (and rename)
starIsStartOfStatement as tokenIsStartOfStatement which takes the token
of interest as an argument.

Update the BPF AsmParser (the only upstream consumer today) to use the
new version.
---
 llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h | 6 ++++--
 llvm/lib/MC/MCParser/AsmParser.cpp                | 6 ++----
 llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp    | 4 +++-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index c7f098be70945..c94ae9442f028 100644
--- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -508,8 +508,10 @@ class MCTargetAsmParser : public MCAsmParserExtension {
   virtual bool equalIsAsmAssignment() { return true; };
   // Return whether this start of statement identifier is a label
   virtual bool isLabel(AsmToken &Token) { return true; };
-  // Return whether this parser accept star as start of statement
-  virtual bool starIsStartOfStatement() { return false; };
+  // Return whether this parser accepts the given token as start of statement.
+  virtual bool tokenIsStartOfStatement(AsmToken::TokenKind Token) {
+    return false;
+  }
 
   virtual const MCExpr *applySpecifier(const MCExpr *E, uint32_t,
                                        MCContext &Ctx) {
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index aee1259eeb126..f27a27833858a 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1769,11 +1769,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     // Treat '}' as a valid identifier in this context.
     Lex();
     IDVal = "}";
-  } else if (Lexer.is(AsmToken::Star) &&
-             getTargetParser().starIsStartOfStatement()) {
-    // Accept '*' as a valid start of statement.
+  } else if (getTargetParser().tokenIsStartOfStatement(ID.getKind())) {
     Lex();
-    IDVal = "*";
+    IDVal = ID.getString();
   } else if (parseIdentifier(IDVal)) {
     if (!TheCondState.Ignore) {
       Lex(); // always eat a token
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 494445fa89b5e..2e4819e5ede38 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -49,7 +49,9 @@ class BPFAsmParser : public MCTargetAsmParser {
   bool equalIsAsmAssignment() override { return false; }
   // "*" is used for dereferencing memory that it will be the start of
   // statement.
-  bool starIsStartOfStatement() override { return true; }
+  bool tokenIsStartOfStatement(AsmToken::TokenKind Token) override {
+    return Token == AsmToken::Star;
+  }
 
 #define GET_ASSEMBLER_HEADER
 #include "BPFGenAsmMatcher.inc"

>From 3d71939dda0a3f6db929b57320342a41b3a2db96 Mon Sep 17 00:00:00 2001
From: Erich Keane <ekeane at nvidia.com>
Date: Wed, 7 May 2025 10:40:15 -0700
Subject: [PATCH 024/115] Propose new ClangIR Maintainers (#138870)

While I don't propose any change to the process we've been using for
ClangIR contributions, it is important that we have maintainers listed
so that folks have a good point of contact for the project in the
upstream.

---------

Co-authored-by: Bruno Cardoso Lopes <bruno.cardoso at gmail.com>
---
 clang/Maintainers.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst
index dfe86a0773c16..8dfa1690ad846 100644
--- a/clang/Maintainers.rst
+++ b/clang/Maintainers.rst
@@ -51,6 +51,14 @@ Clang LLVM IR generation
 | Anton Korobeynikov
 | anton\@korobeynikov.info (email), asl (Phabricator), asl (GitHub)
 
+Clang MLIR generation
+~~~~~~~~~~~~~~~~~~~~~
+| Andy Kaylor
+| akaylor\@nvidia.com (email), AndyKaylor (Discord), AndyKaylor (GitHub)
+
+| Bruno Cardoso Lopes
+| bruno.cardoso\@gmail.com (email), sonicsprawl (Discord), bcardosolopes (GitHub)
+
 
 Analysis & CFG
 ~~~~~~~~~~~~~~

>From bf5971634a9244fd65c1bf8316b3d6ec407783ae Mon Sep 17 00:00:00 2001
From: John Harrison <harjohn at google.com>
Date: Wed, 7 May 2025 10:55:11 -0700
Subject: [PATCH 025/115] [lldb-dap] Fix package.json after a bad merge.
 (#138918)

The package.json is currently malformed after a bad merge in
39e6e888a8155583713e1b8b256119a2be7902e0.
---
 lldb/tools/lldb-dap/package.json | 982 +++++++++++++++----------------
 1 file changed, 491 insertions(+), 491 deletions(-)

diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json
index 4734c9d7277bb..f66badc2a930f 100644
--- a/lldb/tools/lldb-dap/package.json
+++ b/lldb/tools/lldb-dap/package.json
@@ -242,527 +242,527 @@
           }
         }
       }
-    ]
-  },
-  "breakpoints": [
-    {
-      "language": "ada"
-    },
-    {
-      "language": "arm"
-    },
-    {
-      "language": "asm"
-    },
-    {
-      "language": "c"
-    },
-    {
-      "language": "cpp"
-    },
-    {
-      "language": "crystal"
-    },
-    {
-      "language": "d"
-    },
-    {
-      "language": "fortan"
-    },
-    {
-      "language": "fortran-modern"
-    },
-    {
-      "language": "nim"
-    },
-    {
-      "language": "objective-c"
-    },
-    {
-      "language": "objectpascal"
-    },
-    {
-      "language": "pascal"
-    },
-    {
-      "language": "rust"
-    },
-    {
-      "language": "swift"
-    }
-  ],
-  "debuggers": [
-    {
-      "type": "lldb-dap",
-      "label": "LLDB DAP Debugger",
-      "configurationAttributes": {
-        "launch": {
-          "required": [
-            "program"
-          ],
-          "properties": {
-            "debugAdapterHostname": {
-              "type": "string",
-              "markdownDescription": "The hostname that an existing lldb-dap executable is listening on."
-            },
-            "debugAdapterPort": {
-              "type": "number",
-              "markdownDescription": "The port that an existing lldb-dap executable is listening on."
-            },
-            "debugAdapterExecutable": {
-              "type": "string",
-              "markdownDescription": "The absolute path to the LLDB debug adapter executable to use. Overrides any user or workspace settings."
-            },
-            "debugAdapterArgs": {
-              "type": "array",
-              "items": {
-                "type": "string"
+    ],
+    "breakpoints": [
+      {
+        "language": "ada"
+      },
+      {
+        "language": "arm"
+      },
+      {
+        "language": "asm"
+      },
+      {
+        "language": "c"
+      },
+      {
+        "language": "cpp"
+      },
+      {
+        "language": "crystal"
+      },
+      {
+        "language": "d"
+      },
+      {
+        "language": "fortan"
+      },
+      {
+        "language": "fortran-modern"
+      },
+      {
+        "language": "nim"
+      },
+      {
+        "language": "objective-c"
+      },
+      {
+        "language": "objectpascal"
+      },
+      {
+        "language": "pascal"
+      },
+      {
+        "language": "rust"
+      },
+      {
+        "language": "swift"
+      }
+    ],
+    "debuggers": [
+      {
+        "type": "lldb-dap",
+        "label": "LLDB DAP Debugger",
+        "configurationAttributes": {
+          "launch": {
+            "required": [
+              "program"
+            ],
+            "properties": {
+              "debugAdapterHostname": {
+                "type": "string",
+                "markdownDescription": "The hostname that an existing lldb-dap executable is listening on."
               },
-              "markdownDescription": "The list of additional arguments used to launch the debug adapter executable. Overrides any user or workspace settings."
-            },
-            "program": {
-              "type": "string",
-              "description": "Path to the program to debug."
-            },
-            "args": {
-              "type": [
-                "array"
-              ],
-              "items": {
-                "type": "string"
-              },
-              "description": "Program arguments.",
-              "default": []
-            },
-            "cwd": {
-              "type": "string",
-              "description": "Program working directory.",
-              "default": "${workspaceRoot}"
-            },
-            "env": {
-              "anyOf": [
-                {
-                  "type": "object",
-                  "description": "Additional environment variables to set when launching the program. E.g. `{ \"FOO\": \"1\" }`",
-                  "patternProperties": {
-                    ".*": {
-                      "type": "string"
-                    }
-                  },
-                  "default": {}
+              "debugAdapterPort": {
+                "type": "number",
+                "markdownDescription": "The port that an existing lldb-dap executable is listening on."
+              },
+              "debugAdapterExecutable": {
+                "type": "string",
+                "markdownDescription": "The absolute path to the LLDB debug adapter executable to use. Overrides any user or workspace settings."
+              },
+              "debugAdapterArgs": {
+                "type": "array",
+                "items": {
+                  "type": "string"
                 },
-                {
-                  "type": "array",
-                  "description": "Additional environment variables to set when launching the program. E.g. `[\"FOO=1\", \"BAR\"]`",
-                  "items": {
-                    "type": "string",
-                    "pattern": "^((\\w+=.*)|^\\w+)$"
-                  },
-                  "default": []
-                }
-              ]
-            },
-            "stopOnEntry": {
-              "type": "boolean",
-              "description": "Automatically stop after launch.",
-              "default": false
-            },
-            "disableASLR": {
-              "type": "boolean",
-              "description": "Enable or disable Address space layout randomization if the debugger supports it.",
-              "default": true
-            },
-            "disableSTDIO": {
-              "type": "boolean",
-              "description": "Don't retrieve STDIN, STDOUT and STDERR as the program is running.",
-              "default": false
-            },
-            "shellExpandArguments": {
-              "type": "boolean",
-              "description": "Expand program arguments as a shell would without actually launching the program in a shell.",
-              "default": false
-            },
-            "detachOnError": {
-              "type": "boolean",
-              "description": "Detach from the program.",
-              "default": false
-            },
-            "sourcePath": {
-              "type": "string",
-              "description": "Specify a source path to remap \"./\" to allow full paths to be used when setting breakpoints in binaries that have relative source paths."
-            },
-            "sourceMap": {
-              "anyOf": [
-                {
-                  "type": "object",
-                  "description": "Specify an object of path remappings; each entry has a key containing the source path and a value containing the destination path. E.g `{ \"/the/source/path\": \"/the/destination/path\" }`. Overrides sourcePath.",
-                  "patternProperties": {
-                    ".*": {
-                      "type": "string"
-                    }
-                  },
-                  "default": {}
+                "markdownDescription": "The list of additional arguments used to launch the debug adapter executable. Overrides any user or workspace settings."
+              },
+              "program": {
+                "type": "string",
+                "description": "Path to the program to debug."
+              },
+              "args": {
+                "type": [
+                  "array"
+                ],
+                "items": {
+                  "type": "string"
                 },
-                {
-                  "type": "array",
-                  "description": "Specify an array of path remappings; each element must itself be a two element array containing a source and destination path name. Overrides sourcePath.",
-                  "items": {
+                "description": "Program arguments.",
+                "default": []
+              },
+              "cwd": {
+                "type": "string",
+                "description": "Program working directory.",
+                "default": "${workspaceRoot}"
+              },
+              "env": {
+                "anyOf": [
+                  {
+                    "type": "object",
+                    "description": "Additional environment variables to set when launching the program. E.g. `{ \"FOO\": \"1\" }`",
+                    "patternProperties": {
+                      ".*": {
+                        "type": "string"
+                      }
+                    },
+                    "default": {}
+                  },
+                  {
                     "type": "array",
-                    "minItems": 2,
-                    "maxItems": 2,
+                    "description": "Additional environment variables to set when launching the program. E.g. `[\"FOO=1\", \"BAR\"]`",
                     "items": {
-                      "type": "string"
-                    }
+                      "type": "string",
+                      "pattern": "^((\\w+=.*)|^\\w+)$"
+                    },
+                    "default": []
+                  }
+                ]
+              },
+              "stopOnEntry": {
+                "type": "boolean",
+                "description": "Automatically stop after launch.",
+                "default": false
+              },
+              "disableASLR": {
+                "type": "boolean",
+                "description": "Enable or disable Address space layout randomization if the debugger supports it.",
+                "default": true
+              },
+              "disableSTDIO": {
+                "type": "boolean",
+                "description": "Don't retrieve STDIN, STDOUT and STDERR as the program is running.",
+                "default": false
+              },
+              "shellExpandArguments": {
+                "type": "boolean",
+                "description": "Expand program arguments as a shell would without actually launching the program in a shell.",
+                "default": false
+              },
+              "detachOnError": {
+                "type": "boolean",
+                "description": "Detach from the program.",
+                "default": false
+              },
+              "sourcePath": {
+                "type": "string",
+                "description": "Specify a source path to remap \"./\" to allow full paths to be used when setting breakpoints in binaries that have relative source paths."
+              },
+              "sourceMap": {
+                "anyOf": [
+                  {
+                    "type": "object",
+                    "description": "Specify an object of path remappings; each entry has a key containing the source path and a value containing the destination path. E.g `{ \"/the/source/path\": \"/the/destination/path\" }`. Overrides sourcePath.",
+                    "patternProperties": {
+                      ".*": {
+                        "type": "string"
+                      }
+                    },
+                    "default": {}
                   },
-                  "default": []
-                }
-              ]
-            },
-            "debuggerRoot": {
-              "type": "string",
-              "description": "Specify a working directory to set the debug adapter to so relative object files can be located."
-            },
-            "targetTriple": {
-              "type": "string",
-              "description": "Triplet of the target architecture to override value derived from the program file."
-            },
-            "platformName": {
-              "type": "string",
-              "description": "Name of the execution platform to override value derived from the program file."
-            },
-            "initCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+                  {
+                    "type": "array",
+                    "description": "Specify an array of path remappings; each element must itself be a two element array containing a source and destination path name. Overrides sourcePath.",
+                    "items": {
+                      "type": "array",
+                      "minItems": 2,
+                      "maxItems": 2,
+                      "items": {
+                        "type": "string"
+                      }
+                    },
+                    "default": []
+                  }
+                ]
               },
-              "description": "Initialization commands executed upon debugger startup.",
-              "default": []
-            },
-            "preRunCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "debuggerRoot": {
+                "type": "string",
+                "description": "Specify a working directory to set the debug adapter to so relative object files can be located."
               },
-              "description": "Commands executed just before the program is launched.",
-              "default": []
-            },
-            "postRunCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "targetTriple": {
+                "type": "string",
+                "description": "Triplet of the target architecture to override value derived from the program file."
               },
-              "description": "Commands executed just as soon as the program is successfully launched when it's in a stopped state prior to any automatic continuation.",
-              "default": []
-            },
-            "launchCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "platformName": {
+                "type": "string",
+                "description": "Name of the execution platform to override value derived from the program file."
               },
-              "description": "Custom commands that are executed instead of launching a process. A target will be created with the launch arguments prior to executing these commands. The commands may optionally create a new target and must perform a launch. A valid process must exist after these commands complete or the \"launch\" will fail. Launch the process with \"process launch -s\" to make the process to at the entry point since lldb-dap will auto resume if necessary.",
-              "default": []
-            },
-            "stopCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "initCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Initialization commands executed upon debugger startup.",
+                "default": []
               },
-              "description": "Commands executed each time the program stops.",
-              "default": []
-            },
-            "exitCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "preRunCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed just before the program is launched.",
+                "default": []
               },
-              "description": "Commands executed when the program exits.",
-              "default": []
-            },
-            "terminateCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "postRunCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed just as soon as the program is successfully launched when it's in a stopped state prior to any automatic continuation.",
+                "default": []
               },
-              "description": "Commands executed when the debugging session ends.",
-              "default": []
-            },
-            "runInTerminal": {
-              "type": "boolean",
-              "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs",
-              "default": false
-            },
-            "timeout": {
-              "type": "number",
-              "description": "The time in seconds to wait for a program to stop at entry point when launching with \"launchCommands\". Defaults to 30 seconds."
-            },
-            "enableAutoVariableSummaries": {
-              "type": "boolean",
-              "description": "Enable auto generated summaries for variables when no summaries exist for a given type. This feature can cause performance delays in large projects when viewing variables.",
-              "default": false
-            },
-            "displayExtendedBacktrace": {
-              "type": "boolean",
-              "description": "Enable language specific extended backtraces.",
-              "default": false
-            },
-            "enableSyntheticChildDebugging": {
-              "type": "boolean",
-              "description": "If a variable is displayed using a synthetic children, also display the actual contents of the variable at the end under a [raw] entry. This is useful when creating sythetic child plug-ins as it lets you see the actual contents of the variable.",
-              "default": false
-            },
-            "commandEscapePrefix": {
-              "type": "string",
-              "description": "The escape prefix to use for executing regular LLDB commands in the Debug Console, instead of printing variables. Defaults to a back-tick (`). If it's an empty string, then all expression in the Debug Console are treated as regular LLDB commands.",
-              "default": "`"
-            },
-            "customFrameFormat": {
-              "type": "string",
-              "description": "If non-empty, stack frames will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for frames. If the format string contains errors, an error message will be displayed on the Debug Console and the default frame names will be used. This might come with a performance cost because debug information might need to be processed to generate the description.",
-              "default": ""
-            },
-            "customThreadFormat": {
-              "type": "string",
-              "description": "If non-empty, threads will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for threads. If the format string contains errors, an error message will be displayed on the Debug Console and the default thread names will be used. This might come with a performance cost because debug information might need to be processed to generate the description.",
-              "default": ""
+              "launchCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Custom commands that are executed instead of launching a process. A target will be created with the launch arguments prior to executing these commands. The commands may optionally create a new target and must perform a launch. A valid process must exist after these commands complete or the \"launch\" will fail. Launch the process with \"process launch -s\" to make the process to at the entry point since lldb-dap will auto resume if necessary.",
+                "default": []
+              },
+              "stopCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed each time the program stops.",
+                "default": []
+              },
+              "exitCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed when the program exits.",
+                "default": []
+              },
+              "terminateCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed when the debugging session ends.",
+                "default": []
+              },
+              "runInTerminal": {
+                "type": "boolean",
+                "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs",
+                "default": false
+              },
+              "timeout": {
+                "type": "number",
+                "description": "The time in seconds to wait for a program to stop at entry point when launching with \"launchCommands\". Defaults to 30 seconds."
+              },
+              "enableAutoVariableSummaries": {
+                "type": "boolean",
+                "description": "Enable auto generated summaries for variables when no summaries exist for a given type. This feature can cause performance delays in large projects when viewing variables.",
+                "default": false
+              },
+              "displayExtendedBacktrace": {
+                "type": "boolean",
+                "description": "Enable language specific extended backtraces.",
+                "default": false
+              },
+              "enableSyntheticChildDebugging": {
+                "type": "boolean",
+                "description": "If a variable is displayed using a synthetic children, also display the actual contents of the variable at the end under a [raw] entry. This is useful when creating sythetic child plug-ins as it lets you see the actual contents of the variable.",
+                "default": false
+              },
+              "commandEscapePrefix": {
+                "type": "string",
+                "description": "The escape prefix to use for executing regular LLDB commands in the Debug Console, instead of printing variables. Defaults to a back-tick (`). If it's an empty string, then all expression in the Debug Console are treated as regular LLDB commands.",
+                "default": "`"
+              },
+              "customFrameFormat": {
+                "type": "string",
+                "description": "If non-empty, stack frames will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for frames. If the format string contains errors, an error message will be displayed on the Debug Console and the default frame names will be used. This might come with a performance cost because debug information might need to be processed to generate the description.",
+                "default": ""
+              },
+              "customThreadFormat": {
+                "type": "string",
+                "description": "If non-empty, threads will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for threads. If the format string contains errors, an error message will be displayed on the Debug Console and the default thread names will be used. This might come with a performance cost because debug information might need to be processed to generate the description.",
+                "default": ""
+              }
             }
-          }
-        },
-        "attach": {
-          "properties": {
-            "debugAdapterHostname": {
-              "type": "string",
-              "markdownDescription": "The hostname that an existing lldb-dap executable is listening on."
-            },
-            "debugAdapterPort": {
-              "type": "number",
-              "markdownDescription": "The port that an existing lldb-dap executable is listening on."
-            },
-            "debugAdapterExecutable": {
-              "type": "string",
-              "markdownDescription": "The absolute path to the LLDB debug adapter executable to use. Overrides any user or workspace settings."
-            },
-            "debugAdapterArgs": {
-              "type": "array",
-              "items": {
-                "type": "string"
+          },
+          "attach": {
+            "properties": {
+              "debugAdapterHostname": {
+                "type": "string",
+                "markdownDescription": "The hostname that an existing lldb-dap executable is listening on."
               },
-              "markdownDescription": "The list of additional arguments used to launch the debug adapter executable. Overrides any user or workspace settings."
-            },
-            "program": {
-              "type": "string",
-              "description": "Path to the program to attach to."
-            },
-            "pid": {
-              "type": [
-                "number",
-                "string"
-              ],
-              "description": "System process ID to attach to."
-            },
-            "waitFor": {
-              "type": "boolean",
-              "description": "If set to true, then wait for the process to launch by looking for a process with a basename that matches `program`. No process ID needs to be specified when using this flag.",
-              "default": true
-            },
-            "sourcePath": {
-              "type": "string",
-              "description": "Specify a source path to remap \"./\" to allow full paths to be used when setting breakpoints in binaries that have relative source paths."
-            },
-            "sourceMap": {
-              "anyOf": [
-                {
-                  "type": "object",
-                  "description": "Specify an object of path remappings; each entry has a key containing the source path and a value containing the destination path. E.g `{ \"/the/source/path\": \"/the/destination/path\" }`. Overrides sourcePath.",
-                  "patternProperties": {
-                    ".*": {
-                      "type": "string"
-                    }
-                  },
-                  "default": {}
+              "debugAdapterPort": {
+                "type": "number",
+                "markdownDescription": "The port that an existing lldb-dap executable is listening on."
+              },
+              "debugAdapterExecutable": {
+                "type": "string",
+                "markdownDescription": "The absolute path to the LLDB debug adapter executable to use. Overrides any user or workspace settings."
+              },
+              "debugAdapterArgs": {
+                "type": "array",
+                "items": {
+                  "type": "string"
                 },
-                {
-                  "type": "array",
-                  "description": "Specify an array of path remappings; each element must itself be a two element array containing a source and destination path name. Overrides sourcePath.",
-                  "items": {
+                "markdownDescription": "The list of additional arguments used to launch the debug adapter executable. Overrides any user or workspace settings."
+              },
+              "program": {
+                "type": "string",
+                "description": "Path to the program to attach to."
+              },
+              "pid": {
+                "type": [
+                  "number",
+                  "string"
+                ],
+                "description": "System process ID to attach to."
+              },
+              "waitFor": {
+                "type": "boolean",
+                "description": "If set to true, then wait for the process to launch by looking for a process with a basename that matches `program`. No process ID needs to be specified when using this flag.",
+                "default": true
+              },
+              "sourcePath": {
+                "type": "string",
+                "description": "Specify a source path to remap \"./\" to allow full paths to be used when setting breakpoints in binaries that have relative source paths."
+              },
+              "sourceMap": {
+                "anyOf": [
+                  {
+                    "type": "object",
+                    "description": "Specify an object of path remappings; each entry has a key containing the source path and a value containing the destination path. E.g `{ \"/the/source/path\": \"/the/destination/path\" }`. Overrides sourcePath.",
+                    "patternProperties": {
+                      ".*": {
+                        "type": "string"
+                      }
+                    },
+                    "default": {}
+                  },
+                  {
                     "type": "array",
-                    "minItems": 2,
-                    "maxItems": 2,
+                    "description": "Specify an array of path remappings; each element must itself be a two element array containing a source and destination path name. Overrides sourcePath.",
                     "items": {
-                      "type": "string"
-                    }
-                  },
-                  "default": []
-                }
-              ]
-            },
-            "debuggerRoot": {
-              "type": "string",
-              "description": "Specify a working directory to set the debug adapter to so relative object files can be located."
-            },
-            "targetTriple": {
-              "type": "string",
-              "description": "Triplet of the target architecture to override value derived from the program file."
-            },
-            "platformName": {
-              "type": "string",
-              "description": "Name of the execution platform to override value derived from the program file."
-            },
-            "attachCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+                      "type": "array",
+                      "minItems": 2,
+                      "maxItems": 2,
+                      "items": {
+                        "type": "string"
+                      }
+                    },
+                    "default": []
+                  }
+                ]
               },
-              "description": "Custom commands that are executed instead of attaching to a process ID or to a process by name. These commands may optionally create a new target and must perform an attach. A valid process must exist after these commands complete or the \"attach\" will fail.",
-              "default": []
-            },
-            "initCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "debuggerRoot": {
+                "type": "string",
+                "description": "Specify a working directory to set the debug adapter to so relative object files can be located."
               },
-              "description": "Initialization commands executed upon debugger startup.",
-              "default": []
-            },
-            "preRunCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "targetTriple": {
+                "type": "string",
+                "description": "Triplet of the target architecture to override value derived from the program file."
               },
-              "description": "Commands executed just before the program is attached to.",
-              "default": []
-            },
-            "postRunCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "platformName": {
+                "type": "string",
+                "description": "Name of the execution platform to override value derived from the program file."
               },
-              "description": "Commands executed just as soon as the program is successfully attached when it's in a stopped state prior to any automatic continuation.",
-              "default": []
-            },
-            "stopCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "attachCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Custom commands that are executed instead of attaching to a process ID or to a process by name. These commands may optionally create a new target and must perform an attach. A valid process must exist after these commands complete or the \"attach\" will fail.",
+                "default": []
               },
-              "description": "Commands executed each time the program stops.",
-              "default": []
-            },
-            "exitCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "initCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Initialization commands executed upon debugger startup.",
+                "default": []
               },
-              "description": "Commands executed when the program exits.",
-              "default": []
-            },
-            "terminateCommands": {
-              "type": "array",
-              "items": {
-                "type": "string"
+              "preRunCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed just before the program is attached to.",
+                "default": []
               },
-              "description": "Commands executed when the debugging session ends.",
-              "default": []
-            },
-            "coreFile": {
-              "type": "string",
-              "description": "Path to the core file to debug."
-            },
-            "timeout": {
-              "type": "number",
-              "description": "The time in seconds to wait for a program to stop when attaching using \"attachCommands\". Defaults to 30 seconds."
-            },
-            "gdb-remote-port": {
-              "type": [
-                "number",
-                "string"
-              ],
-              "description": "TCP/IP port to attach to a remote system. Specifying both pid and port is an error."
-            },
-            "gdb-remote-hostname": {
-              "type": "string",
-              "description": "The hostname to connect to a remote system. The default hostname being used localhost."
-            },
-            "enableAutoVariableSummaries": {
-              "type": "boolean",
-              "description": "Enable auto generated summaries for variables when no summaries exist for a given type. This feature can cause performance delays in large projects when viewing variables.",
-              "default": false
-            },
-            "displayExtendedBacktrace": {
-              "type": "boolean",
-              "description": "Enable language specific extended backtraces.",
-              "default": false
-            },
-            "enableSyntheticChildDebugging": {
-              "type": "boolean",
-              "description": "If a variable is displayed using a synthetic children, also display the actual contents of the variable at the end under a [raw] entry. This is useful when creating sythetic child plug-ins as it lets you see the actual contents of the variable.",
-              "default": false
-            },
-            "commandEscapePrefix": {
-              "type": "string",
-              "description": "The escape prefix character to use for executing regular LLDB commands in the Debug Console, instead of printing variables. Defaults to a back-tick (`). If empty, then all expression in the Debug Console are treated as regular LLDB commands.",
-              "default": "`"
-            },
-            "customFrameFormat": {
-              "type": "string",
-              "description": "If non-empty, stack frames will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for frames. If the format string contains errors, an error message will be displayed on the Debug Console and the default frame names will be used. This might come with a performance cost because debug information might need to be processed to generate the description.",
-              "default": ""
-            },
-            "customThreadFormat": {
-              "type": "string",
-              "description": "If non-empty, threads will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for threads. If the format string contains errors, an error message will be displayed on the Debug Console and the default thread names will be used. This might come with a performance cost because debug information might need to be processed to generate the description.",
-              "default": ""
+              "postRunCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed just as soon as the program is successfully attached when it's in a stopped state prior to any automatic continuation.",
+                "default": []
+              },
+              "stopCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed each time the program stops.",
+                "default": []
+              },
+              "exitCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed when the program exits.",
+                "default": []
+              },
+              "terminateCommands": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                },
+                "description": "Commands executed when the debugging session ends.",
+                "default": []
+              },
+              "coreFile": {
+                "type": "string",
+                "description": "Path to the core file to debug."
+              },
+              "timeout": {
+                "type": "number",
+                "description": "The time in seconds to wait for a program to stop when attaching using \"attachCommands\". Defaults to 30 seconds."
+              },
+              "gdb-remote-port": {
+                "type": [
+                  "number",
+                  "string"
+                ],
+                "description": "TCP/IP port to attach to a remote system. Specifying both pid and port is an error."
+              },
+              "gdb-remote-hostname": {
+                "type": "string",
+                "description": "The hostname to connect to a remote system. The default hostname being used localhost."
+              },
+              "enableAutoVariableSummaries": {
+                "type": "boolean",
+                "description": "Enable auto generated summaries for variables when no summaries exist for a given type. This feature can cause performance delays in large projects when viewing variables.",
+                "default": false
+              },
+              "displayExtendedBacktrace": {
+                "type": "boolean",
+                "description": "Enable language specific extended backtraces.",
+                "default": false
+              },
+              "enableSyntheticChildDebugging": {
+                "type": "boolean",
+                "description": "If a variable is displayed using a synthetic children, also display the actual contents of the variable at the end under a [raw] entry. This is useful when creating sythetic child plug-ins as it lets you see the actual contents of the variable.",
+                "default": false
+              },
+              "commandEscapePrefix": {
+                "type": "string",
+                "description": "The escape prefix character to use for executing regular LLDB commands in the Debug Console, instead of printing variables. Defaults to a back-tick (`). If empty, then all expression in the Debug Console are treated as regular LLDB commands.",
+                "default": "`"
+              },
+              "customFrameFormat": {
+                "type": "string",
+                "description": "If non-empty, stack frames will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for frames. If the format string contains errors, an error message will be displayed on the Debug Console and the default frame names will be used. This might come with a performance cost because debug information might need to be processed to generate the description.",
+                "default": ""
+              },
+              "customThreadFormat": {
+                "type": "string",
+                "description": "If non-empty, threads will have descriptions generated based on the provided format. See https://lldb.llvm.org/use/formatting.html for an explanation on format strings for threads. If the format string contains errors, an error message will be displayed on the Debug Console and the default thread names will be used. This might come with a performance cost because debug information might need to be processed to generate the description.",
+                "default": ""
+              }
             }
           }
-        }
-      },
-      "initialConfigurations": [
-        {
-          "type": "lldb-dap",
-          "request": "launch",
-          "name": "Debug",
-          "program": "${workspaceRoot}/<your program>",
-          "args": [],
-          "env": [],
-          "cwd": "${workspaceRoot}"
-        }
-      ],
-      "configurationSnippets": [
-        {
-          "label": "LLDB: Launch",
-          "description": "",
-          "body": {
+        },
+        "initialConfigurations": [
+          {
             "type": "lldb-dap",
             "request": "launch",
-            "name": "${2:Launch}",
-            "program": "^\"\\${workspaceRoot}/${1:<your program>}\"",
+            "name": "Debug",
+            "program": "${workspaceRoot}/<your program>",
             "args": [],
             "env": [],
-            "cwd": "^\"\\${workspaceRoot}\""
-          }
-        },
-        {
-          "label": "LLDB: Attach",
-          "description": "",
-          "body": {
-            "type": "lldb-dap",
-            "request": "attach",
-            "name": "${2:Attach}",
-            "program": "${1:<your program>}",
-            "waitFor": true
+            "cwd": "${workspaceRoot}"
           }
-        },
-        {
-          "label": "LLDB: Load Coredump",
-          "description": "",
-          "body": {
-            "type": "lldb-dap",
-            "request": "attach",
-            "name": "${2:Core}",
-            "program": "${1:<your program>}",
-            "coreFile": "${1:<your program>}.core"
+        ],
+        "configurationSnippets": [
+          {
+            "label": "LLDB: Launch",
+            "description": "",
+            "body": {
+              "type": "lldb-dap",
+              "request": "launch",
+              "name": "${2:Launch}",
+              "program": "^\"\\${workspaceRoot}/${1:<your program>}\"",
+              "args": [],
+              "env": [],
+              "cwd": "^\"\\${workspaceRoot}\""
+            }
+          },
+          {
+            "label": "LLDB: Attach",
+            "description": "",
+            "body": {
+              "type": "lldb-dap",
+              "request": "attach",
+              "name": "${2:Attach}",
+              "program": "${1:<your program>}",
+              "waitFor": true
+            }
+          },
+          {
+            "label": "LLDB: Load Coredump",
+            "description": "",
+            "body": {
+              "type": "lldb-dap",
+              "request": "attach",
+              "name": "${2:Core}",
+              "program": "${1:<your program>}",
+              "coreFile": "${1:<your program>}.core"
+            }
           }
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
+        ]
+      }
+    ]
+  }
+}

>From 8810595068a3f17c444e7f96733a6cd9dc08987e Mon Sep 17 00:00:00 2001
From: Devon Loehr <DKLoehr at users.noreply.github.com>
Date: Wed, 7 May 2025 14:10:25 -0400
Subject: [PATCH 026/115] Add unnecessary-virtual-specifier to -Wextra
 (#138741)

Effectively a reland of #133265, though due to discussion there we add
the warning to -Wextra instead of turning it on by default. We still
need to disable it for LLVM due to our unusual policy of using virtual
`anchor` functions even in final classes. We now check if the warning
exists before disabling it in LLVM builds, so hopefully this will fix
the issues libcxx ran into last time.

>From the previous PR:

I've been working on cleaning up this warning in two codebases: LLVM and
chromium (plus its dependencies). The chromium + dependency cleanup has
been straightforward. Git archaeology shows that there are two reasons
for the warnings: classes to which `final` was added after they were
initially committed, and classes with virtual destructors that nobody
remarks on. Presumably the latter case is because people are just very
used to destructors being virtual.

The LLVM cleanup was more surprising: I discovered that we have an [old
policy](https://llvm.org/docs/CodingStandards.html#provide-a-virtual-method-anchor-for-classes-in-headers)
about including out-of-line virtual functions in every class with a
vtable, even `final` ones. This means our codebase has many virtual
"anchor" functions which do nothing except control where the vtable is
emitted, and which trigger the warning. I looked into alternatives to
satisfy the policy, such as using destructors instead of introducing a
new function, but it wasn't clear if they had larger implications.

Overall, it seems like the warning is genuinely useful in most codebases
(evidenced by chromium and its dependencies), and LLVM is an unusual
case. Therefore we should enable the warning by default, and turn it off
only for LLVM builds.
---
 clang/docs/ReleaseNotes.rst                   | 6 +++---
 clang/include/clang/Basic/DiagnosticGroups.td | 8 ++++----
 llvm/cmake/modules/HandleLLVMOptions.cmake    | 5 +++++
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 350244e3054cf..89d7f137d0fe0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -434,9 +434,9 @@ Improvements to Clang's diagnostics
 - The ``-Wsign-compare`` warning now treats expressions with bitwise not(~) and minus(-) as signed integers
   except for the case where the operand is an unsigned integer
   and throws warning if they are compared with unsigned integers (##18878).
-- The ``-Wunnecessary-virtual-specifier`` warning has been added to warn about
-  methods which are marked as virtual inside a ``final`` class, and hence can
-  never be overridden.
+- The ``-Wunnecessary-virtual-specifier`` warning (included in ``-Wextra``) has
+  been added to warn about methods which are marked as virtual inside a
+  ``final`` class, and hence can never be overridden.
 
 - Improve the diagnostics for chained comparisons to report actual expressions and operators (#GH129069).
 
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 76092b84b46ff..7b0dcde44296e 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -421,13 +421,12 @@ def CXX11WarnSuggestOverride : DiagGroup<"suggest-override">;
 def WarnUnnecessaryVirtualSpecifier : DiagGroup<"unnecessary-virtual-specifier"> {
   code Documentation = [{
 Warns when a ``final`` class contains a virtual method (including virtual
-destructors). Since ``final`` classes cannot be subclassed, their methods
-cannot be overridden, and hence the ``virtual`` specifier is useless.
+destructors) that does not override anything. Since ``final`` classes cannot
+be subclassed, their methods cannot be overridden, so there is no point to
+introducing new ``virtual`` methods.
 
 The warning also detects virtual methods in classes whose destructor is
 ``final``, for the same reason.
-
-The warning does not fire on virtual methods which are also marked ``override``.
   }];
 }
 
@@ -1164,6 +1163,7 @@ def Extra : DiagGroup<"extra", [
     FUseLdPath,
     CastFunctionTypeMismatch,
     InitStringTooLongMissingNonString,
+    WarnUnnecessaryVirtualSpecifier,
   ]>;
 
 def Most : DiagGroup<"most", [
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 8b3303fe9f3d2..c427a65ee030c 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -882,6 +882,11 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
   # The LLVM libraries have no stable C++ API, so -Wnoexcept-type is not useful.
   append("-Wno-noexcept-type" CMAKE_CXX_FLAGS)
 
+  # LLVM has a policy of including virtual "anchor" functions to control
+  # where the vtable is emitted. In `final` classes, these are exactly what
+  # this warning detects: unnecessary virtual methods.
+  add_flag_if_supported("-Wno-unnecessary-virtual-specifier" CXX_SUPPORTS_UNNECESSARY_VIRTUAL_FLAG)
+
   if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
     append("-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
   endif()

>From 47218eadd8adf1926ced879caa50b8885d1b070d Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 7 May 2025 11:14:28 -0700
Subject: [PATCH 027/115] [lldb] Use -Wno-documentation-deprecated-sync if
 available (#138909)

report_fatal_error has been marked "@deprecated" in its comment, but
the function itself is not marked with [[deprecated]] yet.  This
causes warnings like:

  llvm/include/llvm/Support/ErrorHandling.h:61:6: error: declaration
  is marked with '@deprecated' command but does not have a deprecation
  attribute [-Werror,-Wdocumentation-deprecated-sync]

  llvm/include/llvm/Support/Error.h:738:6: error: declaration is
  marked with '@deprecated' command but does not have a deprecation
  attribute [-Werror,-Wdocumentation-deprecated-sync]

This patch disables the warning while we migrate away from
report_fatal_error.
---
 lldb/unittests/API/CMakeLists.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lldb/unittests/API/CMakeLists.txt b/lldb/unittests/API/CMakeLists.txt
index 8bdc806878239..06ac49244176c 100644
--- a/lldb/unittests/API/CMakeLists.txt
+++ b/lldb/unittests/API/CMakeLists.txt
@@ -16,6 +16,17 @@ if (CXX_SUPPORTS_DOCUMENTATION)
     PRIVATE -Wdocumentation)
 endif()
 
+# Apply -Wno-documentation-deprecated-sync while we migrate away from
+# report_fatal_error in llvm/include/llvm/Support/ErrorHandling.h
+# and llvm/include/llvm/Support/Error.h.
+# Remove this block of code when the migration is complete.
+# See https://github.com/llvm/llvm-project/issues/138914.
+check_cxx_compiler_flag("-Wno-documentation-deprecated-sync"
+                        CXX_SUPPORTS_NO_DOCUMENTATION_DEPRECATED_SYNC)
+if (CXX_SUPPORTS_NO_DOCUMENTATION_DEPRECATED_SYNC)
+  target_compile_options(APITests
+    PRIVATE -Wno-documentation-deprecated-sync)
+endif()
 
 if(Python3_RPATH)
   set_property(TARGET APITests APPEND PROPERTY BUILD_RPATH "${Python3_RPATH}")

>From 8307d45cc855734650d9fff6778461687a40342b Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc at chromium.org>
Date: Wed, 7 May 2025 11:17:19 -0700
Subject: [PATCH 028/115] [lld][WebAssembly] Fix reported names of LTO output
 files (#138789)

This change was made in the ELF linker in #78835 but somehow never made
it over to the wasm port.
---
 lld/test/wasm/lto/signature-mismatch.ll |  2 +-
 lld/wasm/LTO.cpp                        | 25 +++++++++++++++----------
 lld/wasm/LTO.h                          |  3 ++-
 lld/wasm/SymbolTable.cpp                |  4 ++--
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/lld/test/wasm/lto/signature-mismatch.ll b/lld/test/wasm/lto/signature-mismatch.ll
index cf1a998826fc0..6580c8cf71b33 100644
--- a/lld/test/wasm/lto/signature-mismatch.ll
+++ b/lld/test/wasm/lto/signature-mismatch.ll
@@ -17,4 +17,4 @@ define void @_start() {
 
 ; CHECK: error: function signature mismatch: f
 ; CHECK: >>> defined as (i32) -> void in {{.*}}signature-mismatch.ll.tmp1.o
-; CHECK: >>> defined as () -> void in lto.tmp
+; CHECK: >>> defined as () -> void in {{.*}}signature-mismatch.ll.tmp.wasm.lto.o
diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp
index ab63281012eae..a877f067603e5 100644
--- a/lld/wasm/LTO.cpp
+++ b/lld/wasm/LTO.cpp
@@ -183,10 +183,11 @@ static void thinLTOCreateEmptyIndexFiles() {
 
 // Merge all the bitcode files we have seen, codegen the result
 // and return the resulting objects.
-std::vector<StringRef> BitcodeCompiler::compile() {
+SmallVector<InputFile *, 0> BitcodeCompiler::compile() {
   unsigned maxTasks = ltoObj->getMaxTasks();
   buf.resize(maxTasks);
   files.resize(maxTasks);
+  filenames.resize(maxTasks);
 
   // The --thinlto-cache-dir option specifies the path to a directory in which
   // to cache native object files for ThinLTO incremental builds. If a path was
@@ -233,15 +234,21 @@ std::vector<StringRef> BitcodeCompiler::compile() {
   if (!ctx.arg.thinLTOCacheDir.empty())
     pruneCache(ctx.arg.thinLTOCacheDir, ctx.arg.thinLTOCachePolicy, files);
 
-  std::vector<StringRef> ret;
+  SmallVector<InputFile *, 0> ret;
   for (unsigned i = 0; i != maxTasks; ++i) {
     StringRef objBuf = buf[i].second;
     StringRef bitcodeFilePath = buf[i].first;
+    if (files[i]) {
+      // When files[i] is not null, we get the native relocatable file from the
+      // cache. filenames[i] contains the original BitcodeFile's identifier.
+      objBuf = files[i]->getBuffer();
+      bitcodeFilePath = filenames[i];
+    } else {
+      objBuf = buf[i].second;
+      bitcodeFilePath = buf[i].first;
+    }
     if (objBuf.empty())
       continue;
-    ret.emplace_back(objBuf.data(), objBuf.size());
-    if (!ctx.arg.saveTemps)
-      continue;
 
     // If the input bitcode file is path/to/x.o and -o specifies a.out, the
     // corresponding native relocatable file path will look like:
@@ -266,7 +273,9 @@ std::vector<StringRef> BitcodeCompiler::compile() {
       sys::path::remove_dots(path, true);
       ltoObjName = saver().save(path.str());
     }
-    saveBuffer(objBuf, ltoObjName);
+    if (ctx.arg.saveTemps)
+      saveBuffer(objBuf, ltoObjName);
+    ret.emplace_back(createObjectFile(MemoryBufferRef(objBuf, ltoObjName)));
   }
 
   if (!ctx.arg.ltoObjPath.empty()) {
@@ -275,10 +284,6 @@ std::vector<StringRef> BitcodeCompiler::compile() {
       saveBuffer(buf[i].second, ctx.arg.ltoObjPath + Twine(i));
   }
 
-  for (std::unique_ptr<MemoryBuffer> &file : files)
-    if (file)
-      ret.push_back(file->getBuffer());
-
   return ret;
 }
 
diff --git a/lld/wasm/LTO.h b/lld/wasm/LTO.h
index 43c7672fb5639..21b1d59024663 100644
--- a/lld/wasm/LTO.h
+++ b/lld/wasm/LTO.h
@@ -45,13 +45,14 @@ class BitcodeCompiler {
   ~BitcodeCompiler();
 
   void add(BitcodeFile &f);
-  std::vector<StringRef> compile();
+  SmallVector<InputFile *, 0> compile();
 
 private:
   std::unique_ptr<llvm::lto::LTO> ltoObj;
   // An array of (module name, native relocatable file content) pairs.
   SmallVector<std::pair<std::string, SmallString<0>>, 0> buf;
   std::vector<std::unique_ptr<MemoryBuffer>> files;
+  SmallVector<std::string, 0> filenames;
   std::unique_ptr<llvm::raw_fd_ostream> indexFile;
   llvm::DenseSet<StringRef> thinIndices;
 };
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index bbe48b03f77e5..91677b34ea2ca 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -87,8 +87,8 @@ void SymbolTable::compileBitcodeFiles() {
   for (BitcodeFile *f : ctx.bitcodeFiles)
     lto->add(*f);
 
-  for (StringRef filename : lto->compile()) {
-    auto *obj = make<ObjFile>(MemoryBufferRef(filename, "lto.tmp"), "");
+  for (auto &file : lto->compile()) {
+    auto *obj = cast<ObjFile>(file);
     obj->parse(true);
     ctx.objectFiles.push_back(obj);
   }

>From 05d67348980a32a0e8c53dcf4d70321957e9f895 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 7 May 2025 13:23:24 -0500
Subject: [PATCH 029/115] [Clang] Pass correct lane mask for match helpers
 (#138693)

Summary:
We use the ballot to get the proper lane mask after we've masked off the
threads already done. This isn't an issue on AMDGPU but could cause
problems for post-Volta since it's saying that threads are active when
they aren't.
---
 clang/lib/Headers/gpuintrin.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index d308cc959be84..7afc82413996b 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -264,9 +264,10 @@ __gpu_match_any_u32_impl(uint64_t __lane_mask, uint32_t __x) {
   uint64_t __match_mask = 0;
 
   bool __done = 0;
-  while (__gpu_ballot(__lane_mask, !__done)) {
+  for (uint64_t __active_mask = __lane_mask; __active_mask;
+       __active_mask = __gpu_ballot(__lane_mask, !__done)) {
     if (!__done) {
-      uint32_t __first = __gpu_read_first_lane_u32(__lane_mask, __x);
+      uint32_t __first = __gpu_read_first_lane_u32(__active_mask, __x);
       if (__first == __x) {
         __match_mask = __gpu_lane_mask();
         __done = 1;
@@ -283,9 +284,10 @@ __gpu_match_any_u64_impl(uint64_t __lane_mask, uint64_t __x) {
   uint64_t __match_mask = 0;
 
   bool __done = 0;
-  while (__gpu_ballot(__lane_mask, !__done)) {
+  for (uint64_t __active_mask = __lane_mask; __active_mask;
+       __active_mask = __gpu_ballot(__lane_mask, !__done)) {
     if (!__done) {
-      uint64_t __first = __gpu_read_first_lane_u64(__lane_mask, __x);
+      uint64_t __first = __gpu_read_first_lane_u64(__active_mask, __x);
       if (__first == __x) {
         __match_mask = __gpu_lane_mask();
         __done = 1;

>From 70cf616b331c9db65143490f6958752b6520e758 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek at google.com>
Date: Wed, 7 May 2025 11:28:57 -0700
Subject: [PATCH 030/115] [Fuchsia] Support PGO (#128680)

Enable 2-stage builds with PGO.
---
 .../caches/Fuchsia-stage2-instrumented.cmake  |  45 ++++++++
 clang/cmake/caches/Fuchsia.cmake              | 109 +++++++++++++-----
 2 files changed, 123 insertions(+), 31 deletions(-)
 create mode 100644 clang/cmake/caches/Fuchsia-stage2-instrumented.cmake

diff --git a/clang/cmake/caches/Fuchsia-stage2-instrumented.cmake b/clang/cmake/caches/Fuchsia-stage2-instrumented.cmake
new file mode 100644
index 0000000000000..ecd478aefdaee
--- /dev/null
+++ b/clang/cmake/caches/Fuchsia-stage2-instrumented.cmake
@@ -0,0 +1,45 @@
+# This file sets up a CMakeCache for the second stage of a Fuchsia toolchain build.
+
+include(${CMAKE_CURRENT_LIST_DIR}/Fuchsia-stage2.cmake)
+
+if(NOT APPLE)
+  set(BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "")
+endif()
+
+set(CLANG_BOOTSTRAP_TARGETS
+  check-all
+  check-clang
+  check-lld
+  check-llvm
+  check-polly
+  clang
+  clang-test-depends
+  toolchain-distribution
+  install-toolchain-distribution
+  install-toolchain-distribution-stripped
+  install-toolchain-distribution-toolchain
+  lld-test-depends
+  llvm-config
+  llvm-test-depends
+  test-depends
+  test-suite CACHE STRING "")
+
+get_cmake_property(variableNames VARIABLES)
+foreach(variableName ${variableNames})
+  if(variableName MATCHES "^STAGE2_")
+    string(REPLACE "STAGE2_" "" new_name ${variableName})
+    list(APPEND EXTRA_ARGS "-D${new_name}=${${variableName}}")
+  endif()
+endforeach()
+
+set(CLANG_PGO_TRAINING_DEPS
+  builtins
+  runtimes
+  CACHE STRING "")
+
+# Setup the bootstrap build.
+set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
+set(CLANG_BOOTSTRAP_CMAKE_ARGS
+  ${EXTRA_ARGS}
+  -C ${CMAKE_CURRENT_LIST_DIR}/Fuchsia-stage2.cmake
+  CACHE STRING "")
diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake
index 83336589da305..ee1d681d9f6df 100644
--- a/clang/cmake/caches/Fuchsia.cmake
+++ b/clang/cmake/caches/Fuchsia.cmake
@@ -126,6 +126,16 @@ else()
   set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
   set(LIBCXX_HARDENING_MODE "none" CACHE STRING "")
   set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
+  set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
+  set(COMPILER_RT_BUILD_PROFILE ON CACHE BOOL "")
+  set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
+  set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
+  set(COMPILER_RT_USE_BUILTINS_LIBRARY ON CACHE BOOL "")
+  set(COMPILER_RT_DEFAULT_TARGET_ONLY ON CACHE BOOL "")
+  set(SANITIZER_CXX_ABI "libc++" CACHE STRING "")
+  set(SANITIZER_CXX_ABI_INTREE ON CACHE BOOL "")
+  set(SANITIZER_TEST_CXX "libc++" CACHE STRING "")
+  set(SANITIZER_TEST_CXX_INTREE ON CACHE BOOL "")
   set(LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
   set(RUNTIMES_CMAKE_ARGS "-DCMAKE_OSX_DEPLOYMENT_TARGET=10.13;-DCMAKE_OSX_ARCHITECTURES=arm64|x86_64" CACHE STRING "")
 endif()
@@ -165,33 +175,59 @@ endif()
 set(BOOTSTRAP_LLVM_ENABLE_LLD ON CACHE BOOL "")
 set(BOOTSTRAP_LLVM_ENABLE_LTO ON CACHE BOOL "")
 
-set(_FUCHSIA_BOOTSTRAP_TARGETS
-  check-all
-  check-clang
-  check-lld
-  check-llvm
-  check-polly
-  llvm-config
-  clang-test-depends
-  lld-test-depends
-  llvm-test-depends
-  test-suite
-  test-depends
-  toolchain-distribution
-  install-toolchain-distribution
-  install-toolchain-distribution-stripped
-  install-toolchain-distribution-toolchain
-  clang)
-
-if(FUCHSIA_ENABLE_LLDB)
-  list(APPEND _FUCHSIA_ENABLE_PROJECTS lldb)
-  list(APPEND _FUCHSIA_BOOTSTRAP_TARGETS
-    check-lldb
-    lldb-test-depends
-    debugger-distribution
-    install-debugger-distribution
-    install-debugger-distribution-stripped
-    install-debugger-distribution-toolchain)
+if(FUCHSIA_ENABLE_PGO)
+  set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED ON CACHE BOOL "")
+
+  set(_FUCHSIA_BOOTSTRAP_TARGETS
+    generate-profdata
+    stage2
+    stage2-toolchain-distribution
+    stage2-install-toolchain-distribution
+    stage2-install-toolchain-distribution-stripped
+    stage2-install-toolchain-distribution-toolchain
+    stage2-check-all
+    stage2-check-clang
+    stage2-check-lld
+    stage2-check-llvm
+    stage2-check-polly
+    stage2-test-suite)
+  if(FUCHSIA_ENABLE_LLDB)
+    list(APPEND _FUCHSIA_ENABLE_PROJECTS lldb)
+    list(APPEND _FUCHSIA_BOOTSTRAP_TARGETS
+      stage2-check-lldb
+      stage2-debugger-distribution
+      stage2-install-debugger-distribution
+      stage2-install-debugger-distribution-stripped
+      stage2-install-debugger-distribution-toolchain)
+  endif()
+else()
+ set(_FUCHSIA_BOOTSTRAP_TARGETS
+   check-all
+   check-clang
+   check-lld
+   check-llvm
+   check-polly
+   llvm-config
+   clang
+   clang-test-depends
+   lld-test-depends
+   llvm-test-depends
+   test-suite
+   test-depends
+   toolchain-distribution
+   install-toolchain-distribution
+   install-toolchain-distribution-stripped
+   install-toolchain-distribution-toolchain)
+ if(FUCHSIA_ENABLE_LLDB)
+   list(APPEND _FUCHSIA_ENABLE_PROJECTS lldb)
+   list(APPEND _FUCHSIA_BOOTSTRAP_TARGETS
+     check-lldb
+     lldb-test-depends
+     debugger-distribution
+     install-debugger-distribution
+     install-debugger-distribution-stripped
+     install-debugger-distribution-toolchain)
+ endif()
 endif()
 
 set(LLVM_ENABLE_PROJECTS ${_FUCHSIA_ENABLE_PROJECTS} CACHE STRING "")
@@ -200,6 +236,7 @@ set(CLANG_BOOTSTRAP_TARGETS ${_FUCHSIA_BOOTSTRAP_TARGETS} CACHE STRING "")
 get_cmake_property(variableNames VARIABLES)
 foreach(variableName ${variableNames})
   if(variableName MATCHES "^STAGE2_")
+    list(APPEND EXTRA_ARGS "-D${variableName}=${${variableName}}")
     string(REPLACE "STAGE2_" "" new_name ${variableName})
     string(REPLACE ";" "|" value "${${variableName}}")
     list(APPEND EXTRA_ARGS "-D${new_name}=${value}")
@@ -209,13 +246,23 @@ endforeach()
 # TODO: This is a temporary workaround until we figure out the right solution.
 set(BOOTSTRAP_LLVM_ENABLE_RUNTIMES "compiler-rt;libcxx;libcxxabi;libunwind" CACHE STRING "")
 
+set(LLVM_BUILTIN_TARGETS "default" CACHE STRING "")
+set(LLVM_RUNTIME_TARGETS "default" CACHE STRING "")
+
 # Setup the bootstrap build.
 set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
 set(CLANG_BOOTSTRAP_EXTRA_DEPS
   builtins
   runtimes
   CACHE STRING "")
-set(CLANG_BOOTSTRAP_CMAKE_ARGS
-  ${EXTRA_ARGS}
-  -C ${CMAKE_CURRENT_LIST_DIR}/Fuchsia-stage2.cmake
-  CACHE STRING "")
+if(FUCHSIA_ENABLE_PGO)
+  set(CLANG_BOOTSTRAP_CMAKE_ARGS
+    ${EXTRA_ARGS}
+    -C ${CMAKE_CURRENT_LIST_DIR}/Fuchsia-stage2-instrumented.cmake
+    CACHE STRING "")
+else()
+  set(CLANG_BOOTSTRAP_CMAKE_ARGS
+    ${EXTRA_ARGS}
+    -C ${CMAKE_CURRENT_LIST_DIR}/Fuchsia-stage2.cmake
+    CACHE STRING "")
+endif()

>From 854b9e931703dd1b9d8a2b0fe8da787f9e26058d Mon Sep 17 00:00:00 2001
From: John Harrison <harjohn at google.com>
Date: Wed, 7 May 2025 11:49:53 -0700
Subject: [PATCH 031/115] [lldb-dap] Format extension typescript. (#138925)

I think the format checker isn't checking typescript files. I ran `npm
run format` to fix the extenion typescript.
---
 .../src-ts/debug-configuration-provider.ts    |  96 ++++++----
 .../lldb-dap/src-ts/uri-launch-handler.ts     | 168 ++++++++++--------
 2 files changed, 153 insertions(+), 111 deletions(-)

diff --git a/lldb/tools/lldb-dap/src-ts/debug-configuration-provider.ts b/lldb/tools/lldb-dap/src-ts/debug-configuration-provider.ts
index 8a4089008b2f9..c91b101f4a9ba 100644
--- a/lldb/tools/lldb-dap/src-ts/debug-configuration-provider.ts
+++ b/lldb/tools/lldb-dap/src-ts/debug-configuration-provider.ts
@@ -21,79 +21,97 @@ async function isServerModeSupported(exe: string): Promise<boolean> {
 }
 
 interface BoolConfig {
-  type: 'boolean';
+  type: "boolean";
   default: boolean;
 }
 interface StringConfig {
-  type: 'string';
+  type: "string";
   default: string;
 }
 interface NumberConfig {
-  type: 'number';
+  type: "number";
   default: number;
 }
 interface StringArrayConfig {
-  type: 'stringArray';
+  type: "stringArray";
   default: string[];
 }
-type DefaultConfig = BoolConfig | NumberConfig | StringConfig | StringArrayConfig;
+type DefaultConfig =
+  | BoolConfig
+  | NumberConfig
+  | StringConfig
+  | StringArrayConfig;
 
 const configurations: Record<string, DefaultConfig> = {
   // Keys for debugger configurations.
-  "commandEscapePrefix": { type: "string", default: "`" },
-  "customFrameFormat": { type: "string", default: "" },
-  "customThreadFormat": { type: "string", default: "" },
-  "detachOnError": { type: "boolean", default: false },
-  "disableASLR": { type: "boolean", default: true },
-  "disableSTDIO": { type: "boolean", default: false },
-  "displayExtendedBacktrace": { type: "boolean", default: false },
-  "enableAutoVariableSummaries": { type: "boolean", default: false },
-  "enableSyntheticChildDebugging": { type: "boolean", default: false },
-  "timeout": { type: "number", default: 30 },
+  commandEscapePrefix: { type: "string", default: "`" },
+  customFrameFormat: { type: "string", default: "" },
+  customThreadFormat: { type: "string", default: "" },
+  detachOnError: { type: "boolean", default: false },
+  disableASLR: { type: "boolean", default: true },
+  disableSTDIO: { type: "boolean", default: false },
+  displayExtendedBacktrace: { type: "boolean", default: false },
+  enableAutoVariableSummaries: { type: "boolean", default: false },
+  enableSyntheticChildDebugging: { type: "boolean", default: false },
+  timeout: { type: "number", default: 30 },
 
   // Keys for platform / target configuration.
-  "platformName": { type: "string", default: "" },
-  "targetTriple": { type: "string", default: "" },
+  platformName: { type: "string", default: "" },
+  targetTriple: { type: "string", default: "" },
 
   // Keys for debugger command hooks.
-  "initCommands": { type: "stringArray", default: [] },
-  "preRunCommands": { type: "stringArray", default: [] },
-  "postRunCommands": { type: "stringArray", default: [] },
-  "stopCommands": { type: "stringArray", default: [] },
-  "exitCommands": { type: "stringArray", default: [] },
-  "terminateCommands": { type: "stringArray", default: [] },
+  initCommands: { type: "stringArray", default: [] },
+  preRunCommands: { type: "stringArray", default: [] },
+  postRunCommands: { type: "stringArray", default: [] },
+  stopCommands: { type: "stringArray", default: [] },
+  exitCommands: { type: "stringArray", default: [] },
+  terminateCommands: { type: "stringArray", default: [] },
 };
 
 export class LLDBDapConfigurationProvider
-  implements vscode.DebugConfigurationProvider {
-  constructor(private readonly server: LLDBDapServer) { }
+  implements vscode.DebugConfigurationProvider
+{
+  constructor(private readonly server: LLDBDapServer) {}
 
   async resolveDebugConfiguration(
     folder: vscode.WorkspaceFolder | undefined,
     debugConfiguration: vscode.DebugConfiguration,
-    token?: vscode.CancellationToken): Promise<vscode.DebugConfiguration> {
-    let config = vscode.workspace.getConfiguration('lldb-dap.defaults');
+    token?: vscode.CancellationToken,
+  ): Promise<vscode.DebugConfiguration> {
+    let config = vscode.workspace.getConfiguration("lldb-dap.defaults");
     for (const [key, cfg] of Object.entries(configurations)) {
-      if (Reflect.has(debugConfiguration, key)) continue;
+      if (Reflect.has(debugConfiguration, key)) {
+        continue;
+      }
       const value = config.get(key);
-      if (value === cfg.default) continue;
+      if (!value || value === cfg.default) {
+        continue;
+      }
       switch (cfg.type) {
-        case 'string':
-          if (typeof value !== 'string')
+        case "string":
+          if (typeof value !== "string") {
             throw new Error(`Expected ${key} to be a string, got ${value}`);
+          }
           break;
-        case 'number':
-          if (typeof value !== 'number')
+        case "number":
+          if (typeof value !== "number") {
             throw new Error(`Expected ${key} to be a number, got ${value}`);
+          }
           break;
-        case 'boolean':
-          if (typeof value !== 'boolean')
+        case "boolean":
+          if (typeof value !== "boolean") {
             throw new Error(`Expected ${key} to be a boolean, got ${value}`);
+          }
           break;
-        case 'stringArray':
-          if (typeof value !== 'object' && Array.isArray(value))
-            throw new Error(`Expected ${key} to be a array of strings, got ${value}`);
-          if ((value as string[]).length === 0) continue;
+        case "stringArray":
+          if (typeof value !== "object" && Array.isArray(value)) {
+            throw new Error(
+              `Expected ${key} to be a array of strings, got ${value}`,
+            );
+          }
+          if ((value as string[]).length === 0) {
+            continue;
+          }
           break;
       }
 
diff --git a/lldb/tools/lldb-dap/src-ts/uri-launch-handler.ts b/lldb/tools/lldb-dap/src-ts/uri-launch-handler.ts
index 0c3b1e9a00d9e..d45c1820eec75 100644
--- a/lldb/tools/lldb-dap/src-ts/uri-launch-handler.ts
+++ b/lldb/tools/lldb-dap/src-ts/uri-launch-handler.ts
@@ -1,78 +1,102 @@
 import * as vscode from "vscode";
 
 export class LaunchUriHandler implements vscode.UriHandler {
-    async handleUri(uri: vscode.Uri) {
-        try {
-            const params = new URLSearchParams(uri.query);
-            if (uri.path == '/start') {
-                // Some properties have default values
-                let debugConfig: vscode.DebugConfiguration = {
-                    type: 'lldb-dap',
-                    request: 'launch',
-                    name: '',
-                };
-                // The `config` parameter allows providing a complete JSON-encoded configuration
-                const configJson = params.get("config");
-                if (configJson !== null) {
-                    Object.assign(debugConfig, JSON.parse(configJson));
-                }
-                // Furthermore, some frequently used parameters can also be provided as separate parameters
-                const stringKeys = ["name", "request", "program", "cwd", "debuggerRoot"];
-                const numberKeys = ["pid"];
-                const arrayKeys = [
-                    "args", "initCommands", "preRunCommands", "stopCommands", "exitCommands",
-                    "terminateCommands", "launchCommands", "attachCommands"
-                ];
-                for (const key of stringKeys) {
-                    const value = params.get(key);
-                    if (value) {
-                        debugConfig[key] = value;
-                    }
-                }
-                for (const key of numberKeys) {
-                    const value = params.get(key);
-                    if (value) {
-                        debugConfig[key] = Number(value);
-                    }
-                }
-                for (const key of arrayKeys) {
-                    // `getAll()` returns an array of strings.
-                    const value = params.getAll(key);
-                    if (value) {
-                        debugConfig[key] = value;
-                    }
-                }
-                // Report an error if we received any unknown parameters
-                const supportedKeys = new Set<string>(["config"].concat(stringKeys).concat(numberKeys).concat(arrayKeys));
-                const presentKeys = new Set<string>(params.keys());
-                // FIXME: Use `Set.difference` as soon as ES2024 is widely available
-                const unknownKeys = new Set<string>();
-                for (const k of presentKeys.keys()) {
-                    if (!supportedKeys.has(k)) {
-                        unknownKeys.add(k);
-                    }
-                }
-                if (unknownKeys.size > 0) {
-                    throw new Error(`Unsupported URL parameters: ${Array.from(unknownKeys.keys()).join(", ")}`);
-                }
-                // Prodide a default for the config name
-                const defaultName = debugConfig.request == 'launch' ? "URL-based Launch" : "URL-based Attach";
-                debugConfig.name = debugConfig.name || debugConfig.program || defaultName;
-                // Force the type to `lldb-dap`. We don't want to allow launching any other
-                // Debug Adapters using this URI scheme.
-                if (debugConfig.type != "lldb-dap") {
-                    throw new Error(`Unsupported debugger type: ${debugConfig.type}`);
-                }
-                await vscode.debug.startDebugging(undefined, debugConfig);
-            } else {
-                throw new Error(`Unsupported Uri path: ${uri.path}`);
-            }
-        } catch (err) {
-            if (err instanceof Error) {
-                await vscode.window.showErrorMessage(`Failed to handle lldb-dap URI request: ${err.message}`);
-            } else {
-                await vscode.window.showErrorMessage(`Failed to handle lldb-dap URI request: ${JSON.stringify(err)}`);
-            }
+  async handleUri(uri: vscode.Uri) {
+    try {
+      const params = new URLSearchParams(uri.query);
+      if (uri.path == "/start") {
+        // Some properties have default values
+        let debugConfig: vscode.DebugConfiguration = {
+          type: "lldb-dap",
+          request: "launch",
+          name: "",
+        };
+        // The `config` parameter allows providing a complete JSON-encoded configuration
+        const configJson = params.get("config");
+        if (configJson !== null) {
+          Object.assign(debugConfig, JSON.parse(configJson));
         }
+        // Furthermore, some frequently used parameters can also be provided as separate parameters
+        const stringKeys = [
+          "name",
+          "request",
+          "program",
+          "cwd",
+          "debuggerRoot",
+        ];
+        const numberKeys = ["pid"];
+        const arrayKeys = [
+          "args",
+          "initCommands",
+          "preRunCommands",
+          "stopCommands",
+          "exitCommands",
+          "terminateCommands",
+          "launchCommands",
+          "attachCommands",
+        ];
+        for (const key of stringKeys) {
+          const value = params.get(key);
+          if (value) {
+            debugConfig[key] = value;
+          }
+        }
+        for (const key of numberKeys) {
+          const value = params.get(key);
+          if (value) {
+            debugConfig[key] = Number(value);
+          }
+        }
+        for (const key of arrayKeys) {
+          // `getAll()` returns an array of strings.
+          const value = params.getAll(key);
+          if (value) {
+            debugConfig[key] = value;
+          }
+        }
+        // Report an error if we received any unknown parameters
+        const supportedKeys = new Set<string>(
+          ["config"].concat(stringKeys).concat(numberKeys).concat(arrayKeys),
+        );
+        const presentKeys = new Set<string>(params.keys());
+        // FIXME: Use `Set.difference` as soon as ES2024 is widely available
+        const unknownKeys = new Set<string>();
+        for (const k of presentKeys.keys()) {
+          if (!supportedKeys.has(k)) {
+            unknownKeys.add(k);
+          }
+        }
+        if (unknownKeys.size > 0) {
+          throw new Error(
+            `Unsupported URL parameters: ${Array.from(unknownKeys.keys()).join(", ")}`,
+          );
+        }
+        // Prodide a default for the config name
+        const defaultName =
+          debugConfig.request == "launch"
+            ? "URL-based Launch"
+            : "URL-based Attach";
+        debugConfig.name =
+          debugConfig.name || debugConfig.program || defaultName;
+        // Force the type to `lldb-dap`. We don't want to allow launching any other
+        // Debug Adapters using this URI scheme.
+        if (debugConfig.type != "lldb-dap") {
+          throw new Error(`Unsupported debugger type: ${debugConfig.type}`);
+        }
+        await vscode.debug.startDebugging(undefined, debugConfig);
+      } else {
+        throw new Error(`Unsupported Uri path: ${uri.path}`);
+      }
+    } catch (err) {
+      if (err instanceof Error) {
+        await vscode.window.showErrorMessage(
+          `Failed to handle lldb-dap URI request: ${err.message}`,
+        );
+      } else {
+        await vscode.window.showErrorMessage(
+          `Failed to handle lldb-dap URI request: ${JSON.stringify(err)}`,
+        );
+      }
     }
+  }
 }

>From 39beeb8eaa62506013df235497def6cc15ef362a Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro at gcc.gnu.org>
Date: Wed, 7 May 2025 20:59:42 +0200
Subject: [PATCH 032/115] [OpenMP] Fix __kmp_unnamed_critical_addr .type
 setting (#138815)

PR #138517 broke the Android LLVM builders: ARM doesn't understand the
`@object` form. As it turns out, one can use `%object` instead, which
does assemble on all targets currently supported by `z_Linux_asm.S`.

Tested by rebuilding `libomp.so` on `sparcv9-sun-solaris2.11`.
---
 openmp/runtime/src/z_Linux_asm.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S
index eba7bab371df3..de422f8327a26 100644
--- a/openmp/runtime/src/z_Linux_asm.S
+++ b/openmp/runtime/src/z_Linux_asm.S
@@ -2482,7 +2482,7 @@ __kmp_invoke_microtask:
 KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
     .4byte .gomp_critical_user_
 #ifdef __ELF__
-    .type KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr), at object
+    .type KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),%object
     .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4
 #endif
 #endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC32 */
@@ -2501,7 +2501,7 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
 KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
     .8byte .gomp_critical_user_
 #ifdef __ELF__
-    .type KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr), at object
+    .type KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),%object
     .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
 #endif
 #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||

>From 74c0422cfeac42c83b82a3ff5c0c0cde849bd240 Mon Sep 17 00:00:00 2001
From: jeremyd2019 <github at jdrake.com>
Date: Wed, 7 May 2025 12:17:54 -0700
Subject: [PATCH 033/115] [Clang][CMake] use CMakePushCheckState (#138783)

The previous approach of using list(REMOVE ...) would remove *all*
occurences of the given item, not just the one appended above.
---
 clang/CMakeLists.txt | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index c3f30e2a8e9c0..f12712f55fb96 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -183,18 +183,17 @@ check_include_file(sys/resource.h CLANG_HAVE_RLIMITS)
 # This check requires _GNU_SOURCE on linux
 check_include_file(dlfcn.h CLANG_HAVE_DLFCN_H)
 if( CLANG_HAVE_DLFCN_H )
+  include(CMakePushCheckState)
   include(CheckLibraryExists)
   include(CheckSymbolExists)
   check_library_exists(dl dlopen "" HAVE_LIBDL)
+  cmake_push_check_state()
   if( HAVE_LIBDL )
     list(APPEND CMAKE_REQUIRED_LIBRARIES dl)
   endif()
   list(APPEND CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
   check_symbol_exists(dladdr dlfcn.h CLANG_HAVE_DLADDR)
-  list(REMOVE_ITEM CMAKE_REQUIRED_DEFINITIONS -D_GNU_SOURCE)
-  if( HAVE_LIBDL )
-    list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES dl)
-  endif()
+  cmake_pop_check_state()
 endif()
 
 set(CLANG_RESOURCE_DIR "" CACHE STRING

>From fc281e1b4fcd32f78ed202fbdc92c1816a80e078 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34 at live.cn>
Date: Thu, 8 May 2025 03:20:43 +0800
Subject: [PATCH 034/115] [libc++][docs] Confirm that P3136R1 Retiring
 niebloids is Complete (#135932)

As libc++ has been implementing niebloids as CPOs since LLVM 14 due to
https://reviews.llvm.org/D116570.

Also changes some comments in test files to use the formal term
"algorithm function object".

Closes #118133.
---
 libcxx/docs/Status/Cxx2cPapers.csv                  |  2 +-
 .../ranges_robust_against_omitting_invoke.pass.cpp  |  2 +-
 .../niebloid.compile.pass.cpp                       | 13 ++++++++-----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv
index 0ff1e257ce7f7..e23000230fbca 100644
--- a/libcxx/docs/Status/Cxx2cPapers.csv
+++ b/libcxx/docs/Status/Cxx2cPapers.csv
@@ -76,7 +76,7 @@
 "`P2422R1 <https://wg21.link/P2422R1>`__","Remove ``nodiscard`` annotations from the standard library specification","2024-06 (St. Louis)","|Complete|","19","``nodiscard`` attributes were kept as a conforming extension"
 "`P2300R10 <https://wg21.link/P2300R10>`__","``std::execution``","2024-06 (St. Louis)","","",""
 "","","","","",""
-"`P3136R1 <https://wg21.link/P3136R1>`__","Retiring niebloids","2024-11 (Wrocław)","","",""
+"`P3136R1 <https://wg21.link/P3136R1>`__","Retiring niebloids","2024-11 (Wrocław)","|Complete|","14",""
 "`P3138R5 <https://wg21.link/P3138R5>`__","``views::cache_latest``","2024-11 (Wrocław)","","",""
 "`P3379R0 <https://wg21.link/P3379R0>`__","Constrain ``std::expected`` equality operators","2024-11 (Wrocław)","","",""
 "`P2862R1 <https://wg21.link/P2862R1>`__","``text_encoding::name()`` should never return null values","2024-11 (Wrocław)","","",""
diff --git a/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp
index ca87f6e6984f4..a9750319e740c 100644
--- a/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp
+++ b/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp
@@ -35,7 +35,7 @@ struct Bar {
   Bar create() const { return Bar(); }
 };
 
-// Invokes both the (iterator, sentinel, ...) and the (range, ...) overloads of the given niebloid.
+// Invokes both the (iterator, sentinel, ...) and the (range, ...) overloads of the given algorithm function object.
 
 // (in, ...)
 template <class Func, std::ranges::range Input, class... Args>
diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
index 402bc1c9351aa..dc9134f3deb3c 100644
--- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
+++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
@@ -9,6 +9,7 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // REQUIRES: stdlib=libc++
 
+// [alg.func.obj]
 // [algorithms.requirements]/2
 // [range.iter.ops.general]/2
 
@@ -24,12 +25,14 @@
 
 #include "test_macros.h"
 
-// Niebloids, unlike CPOs, are *not* required to be semiregular or even to have
-// a declared type at all; they are specified as "magic" overload sets whose
-// names are not found by argument-dependent lookup and which inhibit
-// argument-dependent lookup if they are found via a `using`-declaration.
+// Before P3136R1, niebloids were pedantically not CPOs, and they were *not* required to be semiregular or
+// even to have a declared type at all; they were specified as "magic" overload sets
+// whose names are not found by argument-dependent lookup and
+// which inhibit argument-dependent lookup if they are found via a `using`-declaration.
 //
-// libc++ implements them using the same function-object technique we use for CPOs;
+// As of P3136R1, niebloids (formally known as algorithm function objects) are required to be CPOs.
+//
+// libc++ implements niebloids in the same way as CPOs since LLVM 14;
 // therefore this file should stay in sync with ./cpo.compile.pass.cpp.
 
 template <class CPO, class... Args>

>From 13c464be84d9715f0825387f30e455eea7ef75f7 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Thu, 8 May 2025 03:25:00 +0800
Subject: [PATCH 035/115] [libc++] Implement P3379R0 Constrain `std::expected`
 equality operators (#135759)

Closes #118135

Co-authored-by: A. Jiang <de34 at live.cn>
---
 libcxx/docs/Status/Cxx2cPapers.csv            |  2 +-
 libcxx/include/__expected/expected.h          | 42 ++++++++++++++++---
 .../__type_traits/is_core_convertible.h       |  7 ++++
 .../equality/equality.T2.pass.cpp             | 17 ++++----
 .../equality/equality.other_expected.pass.cpp | 22 ++++++----
 .../equality/equality.unexpected.pass.cpp     | 17 ++++----
 .../equality/equality.other_expected.pass.cpp | 15 +++++--
 .../equality/equality.unexpected.pass.cpp     | 17 ++++----
 libcxx/test/std/utilities/expected/types.h    | 13 ++++++
 9 files changed, 110 insertions(+), 42 deletions(-)

diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv
index e23000230fbca..3809446a57896 100644
--- a/libcxx/docs/Status/Cxx2cPapers.csv
+++ b/libcxx/docs/Status/Cxx2cPapers.csv
@@ -78,7 +78,7 @@
 "","","","","",""
 "`P3136R1 <https://wg21.link/P3136R1>`__","Retiring niebloids","2024-11 (Wrocław)","|Complete|","14",""
 "`P3138R5 <https://wg21.link/P3138R5>`__","``views::cache_latest``","2024-11 (Wrocław)","","",""
-"`P3379R0 <https://wg21.link/P3379R0>`__","Constrain ``std::expected`` equality operators","2024-11 (Wrocław)","","",""
+"`P3379R0 <https://wg21.link/P3379R0>`__","Constrain ``std::expected`` equality operators","2024-11 (Wrocław)","|Complete|","21",""
 "`P2862R1 <https://wg21.link/P2862R1>`__","``text_encoding::name()`` should never return null values","2024-11 (Wrocław)","","",""
 "`P2897R7 <https://wg21.link/P2897R7>`__","``aligned_accessor``: An ``mdspan`` accessor expressing pointer over-alignment","2024-11 (Wrocław)","|Complete|","21",""
 "`P3355R1 <https://wg21.link/P3355R1>`__","Fix ``submdspan`` for C++26","2024-11 (Wrocław)","","",""
diff --git a/libcxx/include/__expected/expected.h b/libcxx/include/__expected/expected.h
index 03bbd1623ed5c..6b3d335f2151c 100644
--- a/libcxx/include/__expected/expected.h
+++ b/libcxx/include/__expected/expected.h
@@ -25,6 +25,7 @@
 #include <__type_traits/is_assignable.h>
 #include <__type_traits/is_constructible.h>
 #include <__type_traits/is_convertible.h>
+#include <__type_traits/is_core_convertible.h>
 #include <__type_traits/is_function.h>
 #include <__type_traits/is_nothrow_assignable.h>
 #include <__type_traits/is_nothrow_constructible.h>
@@ -1139,8 +1140,15 @@ class expected : private __expected_base<_Tp, _Err> {
 
   // [expected.object.eq], equality operators
   template <class _T2, class _E2>
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y)
     requires(!is_void_v<_T2>)
-  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y) {
+#  if _LIBCPP_STD_VER >= 26
+            && requires {
+                 { *__x == *__y } -> __core_convertible_to<bool>;
+                 { __x.error() == __y.error() } -> __core_convertible_to<bool>;
+               }
+#  endif
+  {
     if (__x.__has_val() != __y.__has_val()) {
       return false;
     } else {
@@ -1153,12 +1161,24 @@ class expected : private __expected_base<_Tp, _Err> {
   }
 
   template <class _T2>
-  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const _T2& __v) {
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const _T2& __v)
+#  if _LIBCPP_STD_VER >= 26
+    requires(!__is_std_expected<_T2>::value) && requires {
+      { *__x == __v } -> __core_convertible_to<bool>;
+    }
+#  endif
+  {
     return __x.__has_val() && static_cast<bool>(__x.__val() == __v);
   }
 
   template <class _E2>
-  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const unexpected<_E2>& __e) {
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const unexpected<_E2>& __e)
+#  if _LIBCPP_STD_VER >= 26
+    requires requires {
+      { __x.error() == __e.error() } -> __core_convertible_to<bool>;
+    }
+#  endif
+  {
     return !__x.__has_val() && static_cast<bool>(__x.__unex() == __e.error());
   }
 };
@@ -1851,7 +1871,13 @@ class expected<_Tp, _Err> : private __expected_void_base<_Err> {
   // [expected.void.eq], equality operators
   template <class _T2, class _E2>
     requires is_void_v<_T2>
-  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y) {
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y)
+#  if _LIBCPP_STD_VER >= 26
+    requires requires {
+      { __x.error() == __y.error() } -> __core_convertible_to<bool>;
+    }
+#  endif
+  {
     if (__x.__has_val() != __y.__has_val()) {
       return false;
     } else {
@@ -1860,7 +1886,13 @@ class expected<_Tp, _Err> : private __expected_void_base<_Err> {
   }
 
   template <class _E2>
-  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const unexpected<_E2>& __y) {
+  _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const expected& __x, const unexpected<_E2>& __y)
+#  if _LIBCPP_STD_VER >= 26
+    requires requires {
+      { __x.error() == __y.error() } -> __core_convertible_to<bool>;
+    }
+#  endif
+  {
     return !__x.__has_val() && static_cast<bool>(__x.__unex() == __y.error());
   }
 };
diff --git a/libcxx/include/__type_traits/is_core_convertible.h b/libcxx/include/__type_traits/is_core_convertible.h
index 93e23d24d6624..ca3a346c17cd7 100644
--- a/libcxx/include/__type_traits/is_core_convertible.h
+++ b/libcxx/include/__type_traits/is_core_convertible.h
@@ -30,6 +30,13 @@ template <class _Tp, class _Up>
 struct __is_core_convertible<_Tp, _Up, decltype(static_cast<void (*)(_Up)>(0)(static_cast<_Tp (*)()>(0)()))>
     : true_type {};
 
+#if _LIBCPP_STD_VER >= 20
+
+template <class _Tp, class _Up>
+concept __core_convertible_to = __is_core_convertible<_Tp, _Up>::value;
+
+#endif // _LIBCPP_STD_VER >= 20
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___TYPE_TRAITS_IS_CORE_CONVERTIBLE_H
diff --git a/libcxx/test/std/utilities/expected/expected.expected/equality/equality.T2.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/equality/equality.T2.pass.cpp
index bc8b9de97e4d2..25eb97a2df4d3 100644
--- a/libcxx/test/std/utilities/expected/expected.expected/equality/equality.T2.pass.cpp
+++ b/libcxx/test/std/utilities/expected/expected.expected/equality/equality.T2.pass.cpp
@@ -17,18 +17,19 @@
 #include <utility>
 
 #include "test_macros.h"
+#include "../../types.h"
 
-struct Data {
-  int i;
-  constexpr Data(int ii) : i(ii) {}
-
-  friend constexpr bool operator==(const Data& data, int ii) { return data.i == ii; }
-};
+#if TEST_STD_VER >= 26
+// https://wg21.link/P3379R0
+static_assert(CanCompare<std::expected<int, int>, int>);
+static_assert(CanCompare<std::expected<int, int>, EqualityComparable>);
+static_assert(!CanCompare<std::expected<int, int>, NonComparable>);
+#endif
 
 constexpr bool test() {
   // x.has_value()
   {
-    const std::expected<Data, int> e1(std::in_place, 5);
+    const std::expected<EqualityComparable, int> e1(std::in_place, 5);
     int i2 = 10;
     int i3 = 5;
     assert(e1 != i2);
@@ -37,7 +38,7 @@ constexpr bool test() {
 
   // !x.has_value()
   {
-    const std::expected<Data, int> e1(std::unexpect, 5);
+    const std::expected<EqualityComparable, int> e1(std::unexpect, 5);
     int i2 = 10;
     int i3 = 5;
     assert(e1 != i2);
diff --git a/libcxx/test/std/utilities/expected/expected.expected/equality/equality.other_expected.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/equality/equality.other_expected.pass.cpp
index 9325c6c61ad2d..f0f549b6b7772 100644
--- a/libcxx/test/std/utilities/expected/expected.expected/equality/equality.other_expected.pass.cpp
+++ b/libcxx/test/std/utilities/expected/expected.expected/equality/equality.other_expected.pass.cpp
@@ -18,20 +18,26 @@
 #include <utility>
 
 #include "test_macros.h"
+#include "../../types.h"
 
 // Test constraint
-template <class T1, class T2>
-concept CanCompare = requires(T1 t1, T2 t2) { t1 == t2; };
-
-struct Foo{};
-static_assert(!CanCompare<Foo, Foo>);
+static_assert(!CanCompare<NonComparable, NonComparable>);
 
 static_assert(CanCompare<std::expected<int, int>, std::expected<int, int>>);
 static_assert(CanCompare<std::expected<int, int>, std::expected<short, short>>);
 
-// Note this is true because other overloads are unconstrained
-static_assert(CanCompare<std::expected<int, int>, std::expected<void, int>>);
-
+#if TEST_STD_VER >= 26
+// https://wg21.link/P3379R0
+static_assert(!CanCompare<std::expected<int, int>, std::expected<void, int>>);
+static_assert(CanCompare<std::expected<int, int>, std::expected<int, int>>);
+static_assert(!CanCompare<std::expected<NonComparable, int>, std::expected<NonComparable, int>>);
+static_assert(!CanCompare<std::expected<int, NonComparable>, std::expected<int, NonComparable>>);
+static_assert(!CanCompare<std::expected<NonComparable, int>, std::expected<int, NonComparable>>);
+static_assert(!CanCompare<std::expected<int, NonComparable>, std::expected<NonComparable, int>>);
+#else
+// Note this is true because other overloads in expected<non-void> are unconstrained
+static_assert(CanCompare<std::expected<void, int>, std::expected<int, int>>);
+#endif
 constexpr bool test() {
   // x.has_value() && y.has_value()
   {
diff --git a/libcxx/test/std/utilities/expected/expected.expected/equality/equality.unexpected.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/equality/equality.unexpected.pass.cpp
index a8c469d01be28..6c7d2f39514e7 100644
--- a/libcxx/test/std/utilities/expected/expected.expected/equality/equality.unexpected.pass.cpp
+++ b/libcxx/test/std/utilities/expected/expected.expected/equality/equality.unexpected.pass.cpp
@@ -17,18 +17,19 @@
 #include <utility>
 
 #include "test_macros.h"
+#include "../../types.h"
 
-struct Data {
-  int i;
-  constexpr Data(int ii) : i(ii) {}
-
-  friend constexpr bool operator==(const Data& data, int ii) { return data.i == ii; }
-};
+#if TEST_STD_VER >= 26
+// https://wg21.link/P3379R0
+static_assert(CanCompare<std::expected<EqualityComparable, EqualityComparable>, std::unexpected<int>>);
+static_assert(CanCompare<std::expected<EqualityComparable, int>, std::unexpected<EqualityComparable>>);
+static_assert(!CanCompare<std::expected<EqualityComparable, NonComparable>, std::unexpected<int>>);
+#endif
 
 constexpr bool test() {
   // x.has_value()
   {
-    const std::expected<Data, Data> e1(std::in_place, 5);
+    const std::expected<EqualityComparable, EqualityComparable> e1(std::in_place, 5);
     std::unexpected<int> un2(10);
     std::unexpected<int> un3(5);
     assert(e1 != un2);
@@ -37,7 +38,7 @@ constexpr bool test() {
 
   // !x.has_value()
   {
-    const std::expected<Data, Data> e1(std::unexpect, 5);
+    const std::expected<EqualityComparable, EqualityComparable> e1(std::unexpect, 5);
     std::unexpected<int> un2(10);
     std::unexpected<int> un3(5);
     assert(e1 != un2);
diff --git a/libcxx/test/std/utilities/expected/expected.void/equality/equality.other_expected.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/equality/equality.other_expected.pass.cpp
index 8b24875586852..b6c3d8deee644 100644
--- a/libcxx/test/std/utilities/expected/expected.void/equality/equality.other_expected.pass.cpp
+++ b/libcxx/test/std/utilities/expected/expected.void/equality/equality.other_expected.pass.cpp
@@ -18,10 +18,7 @@
 #include <utility>
 
 #include "test_macros.h"
-
-// Test constraint
-template <class T1, class T2>
-concept CanCompare = requires(T1 t1, T2 t2) { t1 == t2; };
+#include "../../types.h"
 
 struct Foo{};
 static_assert(!CanCompare<Foo, Foo>);
@@ -29,8 +26,18 @@ static_assert(!CanCompare<Foo, Foo>);
 static_assert(CanCompare<std::expected<void, int>, std::expected<void, int>>);
 static_assert(CanCompare<std::expected<void, int>, std::expected<void, short>>);
 
+#if TEST_STD_VER >= 26
+// https://wg21.link/P3379R0
+static_assert(!CanCompare<std::expected<void, int>, std::expected<int, int>>);
+static_assert(CanCompare<std::expected<void, int>, std::expected<void, int>>);
+static_assert(CanCompare<std::expected<void, int>, std::expected<void, int>>);
+static_assert(!CanCompare<std::expected<void, NonComparable>, std::expected<void, NonComparable>>);
+static_assert(!CanCompare<std::expected<void, int>, std::expected<void, NonComparable>>);
+static_assert(!CanCompare<std::expected<void, NonComparable>, std::expected<void, int>>);
+#else
 // Note this is true because other overloads in expected<non-void> are unconstrained
 static_assert(CanCompare<std::expected<void, int>, std::expected<int, int>>);
+#endif
 
 constexpr bool test() {
   // x.has_value() && y.has_value()
diff --git a/libcxx/test/std/utilities/expected/expected.void/equality/equality.unexpected.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/equality/equality.unexpected.pass.cpp
index 4500971131b65..f37f38bb71512 100644
--- a/libcxx/test/std/utilities/expected/expected.void/equality/equality.unexpected.pass.cpp
+++ b/libcxx/test/std/utilities/expected/expected.void/equality/equality.unexpected.pass.cpp
@@ -17,18 +17,19 @@
 #include <utility>
 
 #include "test_macros.h"
+#include "../../types.h"
 
-struct Data {
-  int i;
-  constexpr Data(int ii) : i(ii) {}
-
-  friend constexpr bool operator==(const Data& data, int ii) { return data.i == ii; }
-};
+#if TEST_STD_VER >= 26
+// https://wg21.link/P3379R0
+static_assert(CanCompare<std::expected<void, EqualityComparable>, std::unexpected<int>>);
+static_assert(CanCompare<std::expected<void, int>, std::unexpected<EqualityComparable>>);
+static_assert(!CanCompare<std::expected<void, NonComparable>, std::unexpected<int>>);
+#endif
 
 constexpr bool test() {
   // x.has_value()
   {
-    const std::expected<void, Data> e1;
+    const std::expected<void, EqualityComparable> e1;
     std::unexpected<int> un2(10);
     std::unexpected<int> un3(5);
     assert(e1 != un2);
@@ -37,7 +38,7 @@ constexpr bool test() {
 
   // !x.has_value()
   {
-    const std::expected<void, Data> e1(std::unexpect, 5);
+    const std::expected<void, EqualityComparable> e1(std::unexpect, 5);
     std::unexpected<int> un2(10);
     std::unexpected<int> un3(5);
     assert(e1 != un2);
diff --git a/libcxx/test/std/utilities/expected/types.h b/libcxx/test/std/utilities/expected/types.h
index df73ebdfe495e..11473ca3d97de 100644
--- a/libcxx/test/std/utilities/expected/types.h
+++ b/libcxx/test/std/utilities/expected/types.h
@@ -336,4 +336,17 @@ struct CheckForInvalidWrites : public CheckForInvalidWritesBase<WithPaddedExpect
   }
 };
 
+struct NonComparable {};
+
+struct EqualityComparable {
+  int i;
+  constexpr EqualityComparable(int ii) : i(ii) {}
+
+  friend constexpr bool operator==(const EqualityComparable& data, int ii) { return data.i == ii; }
+};
+
+// Test constraint
+template <class T1, class T2>
+concept CanCompare = requires(T1 t1, T2 t2) { t1 == t2; };
+
 #endif // TEST_STD_UTILITIES_EXPECTED_TYPES_H

>From 93aba1e240dbf8fa8f71cbc05dcae2dc1498c2dd Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard at google.com>
Date: Wed, 7 May 2025 12:27:19 -0700
Subject: [PATCH 036/115] [libc++][Android] Disable fdsan in filebuf
 close.pass.cpp (#102412)

fdsan is Bionic's "File Descriptor Sanitizer". Starting in API 30+, it
aborts this close.pass.cpp test, because it closes the FD belonging to
std::filebuf's FILE*. For `__BIONIC__`, disable that part of the test.
---
 .../fstreams/filebuf.members/close.pass.cpp            | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/close.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/close.pass.cpp
index e0338e6f619b7..43233decf1b34 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/close.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/close.pass.cpp
@@ -10,11 +10,6 @@
 
 // basic_filebuf<charT,traits>* close();
 
-// This test closes an fd that belongs to a std::filebuf, and Bionic's fdsan
-// detects this and aborts the process, starting in Android R (API 30).
-// See D137129.
-// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{2[1-9]}}
-
 #include <fstream>
 #include <cassert>
 #if defined(__unix__)
@@ -37,7 +32,10 @@ int main(int, char**)
         assert(f.close() == nullptr);
         assert(!f.is_open());
     }
-#if defined(__unix__)
+    // Starting with Android API 30+, Bionic's fdsan aborts a process that calls
+    // close() on a file descriptor tagged as belonging to something else (such
+    // as a FILE*).
+#if defined(__unix__) && !defined(__BIONIC__)
     {
         std::filebuf f;
         assert(!f.is_open());

>From 16107c88fb6cc474e1e5691025eb295df094a6c2 Mon Sep 17 00:00:00 2001
From: jeremyd2019 <github at jdrake.com>
Date: Wed, 7 May 2025 12:28:11 -0700
Subject: [PATCH 037/115] [LLD][CMake] fix testing standalone build without
 installed llvm-lit. (#138575)

This block was copied from clang/CMakeLists.txt.
---
 lld/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt
index 55d7599a447fc..9b202cc5d4899 100644
--- a/lld/CMakeLists.txt
+++ b/lld/CMakeLists.txt
@@ -63,6 +63,9 @@ if(LLD_BUILT_STANDALONE)
     if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
       # Note: path not really used, except for checking if lit was found
       set(LLVM_EXTERNAL_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
+      if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/llvm-lit)
+        add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit utils/llvm-lit)
+      endif()
       if(NOT LLVM_UTILS_PROVIDED)
         add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/FileCheck utils/FileCheck)
         add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/not utils/not)

>From f74d893987dd72c6a7dc6b4f219877204a7681f4 Mon Sep 17 00:00:00 2001
From: Andres-Salamanca <andrealebarbaritos at gmail.com>
Date: Wed, 7 May 2025 14:28:35 -0500
Subject: [PATCH 038/115] [CIR] Upstream support for switch statements case
 kinds (#138003)

This introduces support for the following cir::case kinds:
- `Equal`
- `AnyOf`
- `Range`
---
 clang/include/clang/CIR/MissingFeatures.h |   5 +-
 clang/lib/CIR/CodeGen/CIRGenFunction.h    |   4 +
 clang/lib/CIR/CodeGen/CIRGenStmt.cpp      |  51 ++-
 clang/test/CIR/CodeGen/switch.cpp         | 425 ++++++++++++++++++++--
 4 files changed, 442 insertions(+), 43 deletions(-)

diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 06636cd6c554c..fb4d8b1c2de2b 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -111,6 +111,10 @@ struct MissingFeatures {
   // Unary operator handling
   static bool opUnaryPromotionType() { return false; }
 
+  // SwitchOp handling
+  static bool foldCascadingCases() { return false; }
+  static bool foldRangeCase() { return false; }
+
   // Clang early optimizations or things defered to LLVM lowering.
   static bool mayHaveIntegerOverflow() { return false; }
   static bool shouldReverseUnaryCondOnBoolExpr() { return false; }
@@ -176,7 +180,6 @@ struct MissingFeatures {
   static bool targetSpecificCXXABI() { return false; }
   static bool moduleNameHash() { return false; }
   static bool setDSOLocal() { return false; }
-  static bool foldCaseStmt() { return false; }
   static bool constantFoldSwitchStatement() { return false; }
   static bool cudaSupport() { return false; }
   static bool maybeHandleStaticInExternC() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index c3aada89c09c4..9066107af595e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -581,6 +581,10 @@ class CIRGenFunction : public CIRGenTypeCache {
   mlir::LogicalResult emitDeclStmt(const clang::DeclStmt &s);
   LValue emitDeclRefLValue(const clang::DeclRefExpr *e);
 
+  mlir::LogicalResult emitDefaultStmt(const clang::DefaultStmt &s,
+                                      mlir::Type condType,
+                                      bool buildingTopLevelCase);
+
   /// Emit an `if` on a boolean condition to the specified blocks.
   /// FIXME: Based on the condition, this might try to simplify the codegen of
   /// the conditional based on the branch.
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index ee4dcc861a1f2..cc96e65e4ce1d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -254,6 +254,7 @@ mlir::LogicalResult CIRGenFunction::emitSimpleStmt(const Stmt *s,
   case Stmt::NullStmtClass:
     break;
   case Stmt::CaseStmtClass:
+  case Stmt::DefaultStmtClass:
     // If we reached here, we must not handling a switch case in the top level.
     return emitSwitchCase(cast<SwitchCase>(*s),
                           /*buildingTopLevelCase=*/false);
@@ -458,7 +459,7 @@ CIRGenFunction::emitCaseDefaultCascade(const T *stmt, mlir::Type condType,
     if (isa<DefaultStmt>(sub) && isa<CaseStmt>(stmt)) {
       subStmtKind = SubStmtKind::Default;
       builder.createYield(loc);
-    } else if (isa<CaseStmt>(sub) && isa<DefaultStmt>(stmt)) {
+    } else if (isa<CaseStmt>(sub) && isa<DefaultStmt, CaseStmt>(stmt)) {
       subStmtKind = SubStmtKind::Case;
       builder.createYield(loc);
     } else {
@@ -503,8 +504,8 @@ CIRGenFunction::emitCaseDefaultCascade(const T *stmt, mlir::Type condType,
   if (subStmtKind == SubStmtKind::Case) {
     result = emitCaseStmt(*cast<CaseStmt>(sub), condType, buildingTopLevelCase);
   } else if (subStmtKind == SubStmtKind::Default) {
-    getCIRGenModule().errorNYI(sub->getSourceRange(), "Default case");
-    return mlir::failure();
+    result = emitDefaultStmt(*cast<DefaultStmt>(sub), condType,
+                             buildingTopLevelCase);
   } else if (buildingTopLevelCase) {
     // If we're building a top level case, try to restore the insert point to
     // the case we're building, then we can attach more random stmts to the
@@ -518,19 +519,40 @@ CIRGenFunction::emitCaseDefaultCascade(const T *stmt, mlir::Type condType,
 mlir::LogicalResult CIRGenFunction::emitCaseStmt(const CaseStmt &s,
                                                  mlir::Type condType,
                                                  bool buildingTopLevelCase) {
+  cir::CaseOpKind kind;
+  mlir::ArrayAttr value;
   llvm::APSInt intVal = s.getLHS()->EvaluateKnownConstInt(getContext());
-  SmallVector<mlir::Attribute, 1> caseEltValueListAttr;
-  caseEltValueListAttr.push_back(cir::IntAttr::get(condType, intVal));
-  mlir::ArrayAttr value = builder.getArrayAttr(caseEltValueListAttr);
-  if (s.getRHS()) {
-    getCIRGenModule().errorNYI(s.getSourceRange(), "SwitchOp range kind");
-    return mlir::failure();
+
+  // If the case statement has an RHS value, it is representing a GNU
+  // case range statement, where LHS is the beginning of the range
+  // and RHS is the end of the range.
+  if (const Expr *rhs = s.getRHS()) {
+    llvm::APSInt endVal = rhs->EvaluateKnownConstInt(getContext());
+    value = builder.getArrayAttr({cir::IntAttr::get(condType, intVal),
+                                  cir::IntAttr::get(condType, endVal)});
+    kind = cir::CaseOpKind::Range;
+
+    // We don't currently fold case range statements with other case statements.
+    // TODO(cir): Add this capability. Folding these cases is going to be
+    // implemented in CIRSimplify when it is upstreamed.
+    assert(!cir::MissingFeatures::foldRangeCase());
+    assert(!cir::MissingFeatures::foldCascadingCases());
+  } else {
+    value = builder.getArrayAttr({cir::IntAttr::get(condType, intVal)});
+    kind = cir::CaseOpKind::Equal;
   }
-  assert(!cir::MissingFeatures::foldCaseStmt());
-  return emitCaseDefaultCascade(&s, condType, value, cir::CaseOpKind::Equal,
+
+  return emitCaseDefaultCascade(&s, condType, value, kind,
                                 buildingTopLevelCase);
 }
 
+mlir::LogicalResult CIRGenFunction::emitDefaultStmt(const clang::DefaultStmt &s,
+                                                    mlir::Type condType,
+                                                    bool buildingTopLevelCase) {
+  return emitCaseDefaultCascade(&s, condType, builder.getArrayAttr({}),
+                                cir::CaseOpKind::Default, buildingTopLevelCase);
+}
+
 mlir::LogicalResult CIRGenFunction::emitSwitchCase(const SwitchCase &s,
                                                    bool buildingTopLevelCase) {
   assert(!condTypeStack.empty() &&
@@ -540,10 +562,9 @@ mlir::LogicalResult CIRGenFunction::emitSwitchCase(const SwitchCase &s,
     return emitCaseStmt(cast<CaseStmt>(s), condTypeStack.back(),
                         buildingTopLevelCase);
 
-  if (s.getStmtClass() == Stmt::DefaultStmtClass) {
-    getCIRGenModule().errorNYI(s.getSourceRange(), "Default case");
-    return mlir::failure();
-  }
+  if (s.getStmtClass() == Stmt::DefaultStmtClass)
+    return emitDefaultStmt(cast<DefaultStmt>(s), condTypeStack.back(),
+                           buildingTopLevelCase);
 
   llvm_unreachable("expect case or default stmt");
 }
diff --git a/clang/test/CIR/CodeGen/switch.cpp b/clang/test/CIR/CodeGen/switch.cpp
index 36523755376a1..0bd4e0759e634 100644
--- a/clang/test/CIR/CodeGen/switch.cpp
+++ b/clang/test/CIR/CodeGen/switch.cpp
@@ -16,8 +16,9 @@ void sw1(int a) {
   }
   }
 }
+
 // CIR: cir.func @_Z3sw1i
-// CIR: cir.switch (%3 : !s32i) {
+// CIR: cir.switch (%[[COND:.*]] : !s32i) {
 // CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) {
 // CIR: cir.break
 // CIR: cir.case(equal, [#cir.int<1> : !s32i]) {
@@ -66,12 +67,12 @@ void sw2(int a) {
 
 // CIR: cir.func @_Z3sw2i
 // CIR: cir.scope {
-// CIR-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init]
-// CIR-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["fomo", init]
-// CIR:        cir.switch (%4 : !s32i) {
+// CIR-NEXT:   %[[YOLO:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init]
+// CIR-NEXT:   %[[FOMO:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["fomo", init]
+// CIR:        cir.switch (%[[COND:.*]] : !s32i) {
 // CIR-NEXT:   cir.case(equal, [#cir.int<3> : !s32i]) {
-// CIR-NEXT:     %5 = cir.const #cir.int<0> : !s32i
-// CIR-NEXT:     cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:     %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:     cir.store %[[ZERO]], %[[FOMO]] : !s32i, !cir.ptr<!s32i>
 
 // OGCG: define dso_local void @_Z3sw2i
 // OGCG: entry:
@@ -91,45 +92,80 @@ void sw2(int a) {
 // OGCG: [[SW_EPILOG]]:
 // OGCG:   ret void
 
+void sw3(int a) {
+  switch (a) {
+  default:
+    break;
+  }
+}
+
+// CIR: cir.func @_Z3sw3i
+// CIR: cir.scope {
+// CIR-NEXT:   %[[COND:.*]] = cir.load %[[A:.*]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   cir.switch (%[[COND]] : !s32i) {
+// CIR-NEXT:   cir.case(default, []) {
+// CIR-NEXT:     cir.break
+// CIR-NEXT:   }
+// CIR-NEXT:   cir.yield
+// CIR-NEXT:   }
+
+// OGCG: define dso_local void @_Z3sw3i
+// OGCG: entry:
+// OGCG:   %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG:   switch i32 %[[A_VAL]], label %[[DEFAULT:.*]] [
+// OGCG: [[DEFAULT]]:
+// OGCG:   br label %[[EPILOG:.*]]
+// OGCG: [[EPILOG]]:
+// OGCG:   ret void
+
 int sw4(int a) {
   switch (a) {
   case 42: {
     return 3;
   }
-  // TODO: add default case when it is upstreamed
+  default:
+    return 2;
   }
   return 0;
 }
 
 // CIR: cir.func @_Z3sw4i
-// CIR:       cir.switch (%4 : !s32i) {
+// CIR:       cir.switch (%[[COND:.*]] : !s32i) {
 // CIR-NEXT:       cir.case(equal, [#cir.int<42> : !s32i]) {
 // CIR-NEXT:         cir.scope {
-// CIR-NEXT:           %5 = cir.const #cir.int<3> : !s32i
-// CIR-NEXT:           cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
-// CIR-NEXT:           %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
-// CIR-NEXT:           cir.return %6 : !s32i
+// CIR-NEXT:           %[[THREE:.*]] = cir.const #cir.int<3> : !s32i
+// CIR-NEXT:           cir.store %[[THREE]], %[[RETVAL:.*]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:           %[[RET3:.*]] = cir.load %[[RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:           cir.return %[[RET3]] : !s32i
 // CIR-NEXT:         }
 // CIR-NEXT:         cir.yield
 // CIR-NEXT:       }
+// CIR-NEXT:       cir.case(default, []) {
+// CIR-NEXT:         %[[TWO:.*]] = cir.const #cir.int<2> : !s32i
+// CIR-NEXT:         cir.store %[[TWO]], %[[RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:         %[[RET2:.*]] = cir.load %[[RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:         cir.return %[[RET2]] : !s32i
+// CIR-NEXT:       }
+// CIR-NEXT:       cir.yield
+// CIR-NEXT:  }
 
 // OGCG: define dso_local noundef i32 @_Z3sw4i
 // OGCG: entry:
 // OGCG:   %[[RETVAL:.*]] = alloca i32, align 4
 // OGCG:   %[[A_ADDR:.*]] = alloca i32, align 4
 // OGCG:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
-// OGCG:   switch i32 %[[A_VAL]], label %[[EPILOG:.*]] [
+// OGCG:   switch i32 %[[A_VAL]], label %[[DEFAULT:.*]] [
 // OGCG:     i32 42, label %[[SW42:.*]]
 // OGCG:   ]
 // OGCG: [[SW42]]:
 // OGCG:   br label %[[RETURN:.*]]
-// OGCG: [[EPILOG]]:
+// OGCG: [[DEFAULT]]:
 // OGCG:   br label %[[RETURN]]
 // OGCG: [[RETURN]]:
 // OGCG:   %[[RETVAL_LOAD:.*]] = load i32, ptr %[[RETVAL]], align 4
 // OGCG:   ret i32 %[[RETVAL_LOAD]]
 
-
 void sw5(int a) {
   switch (a) {
   case 1:;
@@ -137,7 +173,7 @@ void sw5(int a) {
 }
 
 // CIR: cir.func @_Z3sw5i
-// CIR: cir.switch (%1 : !s32i) {
+// CIR: cir.switch (%[[A:.*]] : !s32i) {
 // CIR-NEXT:   cir.case(equal, [#cir.int<1> : !s32i]) {
 // CIR-NEXT:     cir.yield
 // CIR-NEXT:   }
@@ -156,22 +192,138 @@ void sw5(int a) {
 // OGCG: [[SW_EPILOG]]:
 // OGCG:   ret void
 
+void sw6(int a) {
+  switch (a) {
+  case 0:
+  case 1:
+  case 2:
+    break;
+  case 3:
+  case 4:
+  case 5:
+    break;
+  }
+}
+
+// CIR: cir.func @_Z3sw6i
+// CIR: cir.switch (%[[A:.*]] : !s32i) {
+// CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<2> : !s32i]) {
+// CIR-NEXT:     cir.break
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<5> : !s32i]) {
+// CIR-NEXT:     cir.break
+// CIR-NEXT: }
+
+
+// OGCG: define dso_local void @_Z3sw6i
+// OGCG: entry:
+// OGCG:   %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG:   store i32 %a, ptr %[[A_ADDR]], align 4
+// OGCG:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG:   switch i32 %[[A_VAL]], label %[[EPILOG:.*]] [
+// OGCG:     i32 0, label %[[BB0:.*]]
+// OGCG:     i32 1, label %[[BB0]]
+// OGCG:     i32 2, label %[[BB0]]
+// OGCG:     i32 3, label %[[BB1:.*]]
+// OGCG:     i32 4, label %[[BB1]]
+// OGCG:     i32 5, label %[[BB1]]
+// OGCG:   ]
+// OGCG: [[BB0]]:
+// OGCG:   br label %[[EPILOG]]
+// OGCG: [[BB1]]:
+// OGCG:   br label %[[EPILOG]]
+// OGCG: [[EPILOG]]:
+// OGCG:   ret void
+
+void sw7(int a) {
+  switch (a) {
+  case 0:
+  case 1:
+  case 2:
+    int x;
+  case 3:
+  case 4:
+  case 5:
+    break;
+  }
+}
+
+// CIR: cir.func @_Z3sw7i
+// CIR: %[[X:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x"]
+// CIR: cir.switch (%[[A:.*]] : !s32i)
+// CIR-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<2> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
+// CIR-NEXT:     cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<5> : !s32i]) {
+// CIR-NEXT:     cir.break
+// CIR-NEXT: }
+// CIR-NEXT: cir.yield
+// CIR: }
+
+// OGCG: define dso_local void @_Z3sw7i
+// OGCG: entry:
+// OGCG:   %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG:   switch i32 %[[A_VAL]], label %[[EPILOG:.*]] [
+// OGCG:     i32 0, label %[[BB0:.*]]
+// OGCG:     i32 1, label %[[BB0]]
+// OGCG:     i32 2, label %[[BB0]]
+// OGCG:     i32 3, label %[[BB1:.*]]
+// OGCG:     i32 4, label %[[BB1]]
+// OGCG:     i32 5, label %[[BB1]]
+// OGCG:   ]
+// OGCG: [[BB0]]:
+// OGCG:   br label %[[BB1]]
+// OGCG: [[BB1]]:
+// OGCG:   br label %[[EPILOG]]
+// OGCG: [[EPILOG]]:
+// OGCG:   ret void
+
+
 void sw8(int a) {
   switch (a)
   {
   case 3:
     break;
   case 4:
-  // TODO: add default case when it is upstreamed
+  default:
     break;
   }
 }
 
 // CIR:    cir.func @_Z3sw8i
-// CIR:      cir.case(equal, [#cir.int<3> : !s32i]) {
+// CIR:    cir.switch (%[[A:.*]] : !s32i)
+// CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) {
 // CIR-NEXT:   cir.break
 // CIR-NEXT: }
 // CIR-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(default, []) {
 // CIR-NEXT:   cir.break
 // CIR-NEXT: }
 
@@ -180,33 +332,38 @@ void sw8(int a) {
 // OGCG: entry:
 // OGCG:   %[[A_ADDR:.*]] = alloca i32, align 4
 // OGCG:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
-// OGCG:   switch i32 %[[A_VAL]], label %[[EPILOG:.*]] [
+// OGCG:   switch i32 %[[A_VAL]], label %[[DEFAULT:.*]] [
 // OGCG:     i32 3, label %[[SW3:.*]]
 // OGCG:     i32 4, label %[[SW4:.*]]
 // OGCG:   ]
 // OGCG: [[SW3]]:
-// OGCG:   br label %[[EPILOG]]
+// OGCG:   br label %[[EPILOG:.*]]
 // OGCG: [[SW4]]:
+// OGCG:   br label %[[DEFAULT]]
+// OGCG: [[DEFAULT]]:
 // OGCG:   br label %[[EPILOG]]
 // OGCG: [[EPILOG]]:
 // OGCG:   ret void
 
-
 void sw9(int a) {
   switch (a)
   {
   case 3:
     break;
-  // TODO: add default case when it is upstreamed
+  default:
   case 4:
     break;
   }
 }
 
 // CIR:    cir.func @_Z3sw9i
-// CIR:      cir.case(equal, [#cir.int<3> : !s32i]) {
+// CIR:    cir.switch (%[[A:.*]] : !s32i)
+// CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) {
 // CIR-NEXT:   cir.break
 // CIR-NEXT: }
+// CIR-NEXT: cir.case(default, []) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
 // CIR-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
 // CIR-NEXT:   cir.break
 // CIR-NEXT: }
@@ -215,17 +372,123 @@ void sw9(int a) {
 // OGCG: entry:
 // OGCG:   %[[A_ADDR:.*]] = alloca i32, align 4
 // OGCG:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
-// OGCG:   switch i32 %[[A_VAL]], label %[[EPILOG:.*]] [
+// OGCG:   switch i32 %[[A_VAL]], label %[[DEFAULT:.*]] [
 // OGCG:     i32 3, label %[[SW3:.*]]
 // OGCG:     i32 4, label %[[SW4:.*]]
 // OGCG:   ]
 // OGCG: [[SW3]]:
-// OGCG:   br label %[[EPILOG]]
+// OGCG:   br label %[[EPILOG:.*]]
+// OGCG: [[DEFAULT]]:
+// OGCG:   br label %[[SW4]]
 // OGCG: [[SW4]]:
 // OGCG:   br label %[[EPILOG]]
 // OGCG: [[EPILOG]]:
 // OGCG:   ret void
 
+void sw10(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  case 4:
+  default:
+  case 5:
+    break;
+  }
+}
+
+// CIR:    cir.func @_Z4sw10i
+// CIR:    cir.switch (%[[A:.*]] : !s32i)
+// CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) {
+// CIR-NEXT:   cir.break
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(default, []) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<5> : !s32i]) {
+// CIR-NEXT:   cir.break
+// CIR-NEXT: }
+
+// OGCG: define dso_local void @_Z4sw10i
+// OGCG: entry:
+// OGCG:   %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG:   switch i32 %[[A_VAL]], label %[[DEFAULT:.*]] [
+// OGCG:     i32 3, label %[[BB3:.*]]
+// OGCG:     i32 4, label %[[BB4:.*]]
+// OGCG:     i32 5, label %[[BB5:.*]]
+// OGCG:   ]
+// OGCG: [[BB3]]:
+// OGCG:   br label %[[EPILOG:.*]]
+// OGCG: [[BB4]]:
+// OGCG:   br label %[[DEFAULT]]
+// OGCG: [[DEFAULT]]:
+// OGCG:   br label %[[BB5]]
+// OGCG: [[BB5]]:
+// OGCG:   br label %[[EPILOG]]
+// OGCG: [[EPILOG]]:
+// OGCG:   ret void
+
+void sw11(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  case 4:
+  case 5:
+  default:
+  case 6:
+  case 7:
+    break;
+  }
+}
+
+// CIR:    cir.func @_Z4sw11i
+// CIR:    cir.switch (%[[A:.*]] : !s32i)
+// CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) {
+// CIR-NEXT:   cir.break
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<5> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(default, []) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<6> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<7> : !s32i]) {
+// CIR-NEXT:   cir.break
+// CIR-NEXT: }
+
+// OGCG: define dso_local void @_Z4sw11i
+// OGCG: entry:
+// OGCG:   %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG:   %[[A_VAL:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG:   switch i32 %[[A_VAL]], label %[[DEFAULT:.*]] [
+// OGCG:     i32 3, label %[[BB3:.*]]
+// OGCG:     i32 4, label %[[BB4:.*]]
+// OGCG:     i32 5, label %[[BB4]]
+// OGCG:     i32 6, label %[[BB6:.*]]
+// OGCG:     i32 7, label %[[BB6]]
+// OGCG:   ]
+// OGCG: [[BB3]]:
+// OGCG:   br label %[[EPILOG:.*]]
+// OGCG: [[BB4]]:
+// OGCG:   br label %[[DEFAULT]]
+// OGCG: [[DEFAULT]]:
+// OGCG:   br label %[[BB6]]
+// OGCG: [[BB6]]:
+// OGCG:   br label %[[EPILOG]]
+// OGCG: [[EPILOG]]:
+// OGCG:   ret void
+
 void sw12(int a) {
   switch (a)
   {
@@ -278,7 +541,7 @@ void sw13(int a, int b) {
 // CIR-NEXT:          cir.yield
 // CIR-NEXT:        }
 // CIR-NEXT:      }
-// CIR:         cir.yield
+//      CIR:    cir.yield
 //      CIR:    }
 //      CIR:    cir.return
 
@@ -302,6 +565,114 @@ void sw13(int a, int b) {
 // OGCG: [[EPILOG2]]:
 // OGCG:   ret void
 
+void sw14(int x) {
+  switch (x) {
+    case 1:
+    case 2:
+    case 3 ... 6:
+    case 7:
+      break;
+    default:
+      break;
+  }
+}
+
+// CIR:      cir.func @_Z4sw14i
+// CIR:      cir.switch
+// CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<2> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(range, [#cir.int<3> : !s32i, #cir.int<6> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<7> : !s32i]) {
+// CIR-NEXT:   cir.break
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(default, []) {
+// CIR-NEXT:   cir.break
+// CIR-NEXT: }
+
+// OGCG: define dso_local void @_Z4sw14i
+// OGCG: entry:
+// OGCG:   %[[X_ADDR:.*]] = alloca i32, align 4
+// OGCG:   store i32 %x, ptr %[[X_ADDR]], align 4
+// OGCG:   %[[X_VAL:.*]] = load i32, ptr %[[X_ADDR]], align 4
+
+// OGCG:   switch i32 %[[X_VAL]], label %[[DEFAULT:.*]] [
+// OGCG-DAG:     i32 1, label %[[BB1:.*]]
+// OGCG-DAG:     i32 2, label %[[BB1]]
+// OGCG-DAG:     i32 3, label %[[BB2:.*]]
+// OGCG-DAG:     i32 4, label %[[BB2]]
+// OGCG-DAG:     i32 5, label %[[BB2]]
+// OGCG-DAG:     i32 6, label %[[BB2]]
+// OGCG-DAG:     i32 7, label %[[BB3:.*]]
+// OGCG:   ]
+// OGCG: [[BB1]]:
+// OGCG:   br label %[[BB2]]
+// OGCG: [[BB2]]:
+// OGCG:   br label %[[BB3]]
+// OGCG: [[BB3]]:
+// OGCG:   br label %[[EPILOG:.*]]
+// OGCG: [[DEFAULT]]:
+// OGCG:   br label %[[EPILOG]]
+// OGCG: [[EPILOG]]:
+// OGCG:   ret void
+
+void sw15(int x) {
+  int y;
+  switch (x) {
+    case 1:
+    case 2:
+      y = 0;
+    case 3:
+      break;
+    default:
+      break;
+  }
+}
+
+// CIR:      cir.func @_Z4sw15i
+// CIR:      %[[Y:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y"]
+// CIR:      cir.switch
+// CIR-NEXT: cir.case(equal, [#cir.int<1> : !s32i]) {
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<2> : !s32i]) {
+// CIR-NEXT:   %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:   cir.store %[[ZERO]], %[[Y]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(equal, [#cir.int<3> : !s32i]) {
+// CIR-NEXT:   cir.break
+// CIR-NEXT: }
+// CIR-NEXT: cir.case(default, []) {
+// CIR-NEXT:   cir.break
+// CIR-NEXT: }
+
+// OGCG: define dso_local void @_Z4sw15i
+// OGCG: entry:
+// OGCG:   %[[X_ADDR:.*]] = alloca i32, align 4
+// OGCG:   %[[Y:.*]] = alloca i32, align 4
+// OGCG:   store i32 %x, ptr %[[X_ADDR]], align 4
+// OGCG:   %[[X_VAL:.*]] = load i32, ptr %[[X_ADDR]], align 4
+// OGCG:   switch i32 %[[X_VAL]], label %[[DEFAULT:.*]] [
+// OGCG-DAG:     i32 1, label %[[BB0:.*]]
+// OGCG-DAG:     i32 2, label %[[BB0]]
+// OGCG-DAG:     i32 3, label %[[BB1:.*]]
+// OGCG:   ]
+// OGCG: [[BB0]]:
+// OGCG:   store i32 0, ptr %[[Y]], align 4
+// OGCG:   br label %[[BB1]]
+// OGCG: [[BB1]]:
+// OGCG:   br label %[[EPILOG:.*]]
+// OGCG: [[DEFAULT]]:
+// OGCG:   br label %[[EPILOG]]
+// OGCG: [[EPILOG]]:
+// OGCG:   ret void
+
 int nested_switch(int a) {
   switch (int b = 1; a) {
   case 0:
@@ -325,7 +696,7 @@ int nested_switch(int a) {
   return 0;
 }
 
-// CIR: cir.switch (%6 : !s32i) {
+// CIR: cir.switch (%[[COND:.*]] : !s32i) {
 // CIR:   cir.case(equal, [#cir.int<0> : !s32i]) {
 // CIR:     cir.yield
 // CIR:   }

>From db38cc27bc61cf2d53bcac1203722853610aa073 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 7 May 2025 12:32:08 -0700
Subject: [PATCH 039/115] [analyzer] Make it a noop when initializing a field
 of empty record (#138594)

Previously, Static Analyzer initializes empty type fields with zeroes.
This can cause problems when those fields have no unique addresses. For
example, https://github.com/llvm/llvm-project/issues/137252.

rdar://146753089
---
 .../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp |  7 ++-
 clang/test/Analysis/issue-137252.cpp          | 50 +++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Analysis/issue-137252.cpp

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index 92ce3fa2225c8..ff07402a29bba 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/AttrIterator.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/ParentMap.h"
@@ -715,7 +716,11 @@ void ExprEngine::handleConstructor(const Expr *E,
         // actually make things worse. Placement new makes this tricky as well,
         // since it's then possible to be initializing one part of a multi-
         // dimensional array.
-        State = State->bindDefaultZero(Target, LCtx);
+        const CXXRecordDecl *TargetHeldRecord =
+            cast<CXXRecordDecl>(CE->getType()->getAsRecordDecl());
+
+        if (!TargetHeldRecord || !TargetHeldRecord->isEmpty())
+          State = State->bindDefaultZero(Target, LCtx);
       }
 
       Bldr.generateNode(CE, N, State, /*tag=*/nullptr,
diff --git a/clang/test/Analysis/issue-137252.cpp b/clang/test/Analysis/issue-137252.cpp
new file mode 100644
index 0000000000000..6ca3e20ccbbca
--- /dev/null
+++ b/clang/test/Analysis/issue-137252.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus -verify %s -DEMPTY_CLASS
+// UNSUPPORTED: system-windows
+// expected-no-diagnostics
+
+// This test reproduces the issue that previously the static analyzer
+// initialized an [[no_unique_address]] empty field to zero,
+// over-writing a non-empty field with the same offset.
+
+namespace std {
+#ifdef EMPTY_CLASS
+
+  struct default_delete {};
+  template <class _Tp, class _Dp = default_delete >
+#else
+  // Class with methods and static members is still empty:
+  template <typename T>
+  class default_delete {
+    T dump();
+    static T x;
+  };
+  template <class _Tp, class _Dp = default_delete<_Tp> >
+#endif
+  class unique_ptr {
+    [[no_unique_address]]  _Tp * __ptr_;
+    [[no_unique_address]] _Dp __deleter_;
+
+  public:
+    explicit unique_ptr(_Tp* __p) noexcept
+      : __ptr_(__p),
+        __deleter_() {}
+
+    ~unique_ptr() {
+      delete __ptr_;
+    }
+  };
+}
+
+struct X {};
+
+int main()
+{
+  // Previously a leak falsely reported here.  It was because the
+  // Static Analyzer engine simulated the initialization of
+  // `__deleter__` incorrectly.  The engine assigned zero to
+  // `__deleter__`--an empty record sharing offset with `__ptr__`.
+  // The assignment over wrote `__ptr__`.
+  std::unique_ptr<X> a(new X()); 
+  return 0;
+}

>From 9a2d6021878768c67243d65a5ebc98a40db34582 Mon Sep 17 00:00:00 2001
From: Kelvin Li <kkwli at users.noreply.github.com>
Date: Wed, 7 May 2025 15:33:23 -0400
Subject: [PATCH 040/115] [flang][AIX] Predefine __64BIT__ and _AIX macros
 (#138591)

---
 flang/lib/Frontend/CompilerInvocation.cpp      | 17 ++++++++++++-----
 .../test/Driver/predefined-macros-powerpc2.f90 | 18 +++++++++++++++---
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 28f2f69f23baf..238079a09ef3a 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1615,13 +1615,10 @@ void CompilerInvocation::setDefaultPredefinitions() {
   }
 
   llvm::Triple targetTriple{llvm::Triple(this->targetOpts.triple)};
-  if (targetTriple.isPPC()) {
-    // '__powerpc__' is a generic macro for any PowerPC cases. e.g. Max integer
-    // size.
-    fortranOptions.predefinitions.emplace_back("__powerpc__", "1");
-  }
   if (targetTriple.isOSLinux()) {
     fortranOptions.predefinitions.emplace_back("__linux__", "1");
+  } else if (targetTriple.isOSAIX()) {
+    fortranOptions.predefinitions.emplace_back("_AIX", "1");
   }
 
   switch (targetTriple.getArch()) {
@@ -1631,6 +1628,16 @@ void CompilerInvocation::setDefaultPredefinitions() {
     fortranOptions.predefinitions.emplace_back("__x86_64__", "1");
     fortranOptions.predefinitions.emplace_back("__x86_64", "1");
     break;
+  case llvm::Triple::ArchType::ppc:
+  case llvm::Triple::ArchType::ppc64:
+  case llvm::Triple::ArchType::ppcle:
+  case llvm::Triple::ArchType::ppc64le:
+    // '__powerpc__' is a generic macro for any PowerPC.
+    fortranOptions.predefinitions.emplace_back("__powerpc__", "1");
+    if (targetTriple.isOSAIX() && targetTriple.isArch64Bit()) {
+      fortranOptions.predefinitions.emplace_back("__64BIT__", "1");
+    }
+    break;
   }
 }
 
diff --git a/flang/test/Driver/predefined-macros-powerpc2.f90 b/flang/test/Driver/predefined-macros-powerpc2.f90
index 6e10235e21f86..6d235afcf8c3b 100644
--- a/flang/test/Driver/predefined-macros-powerpc2.f90
+++ b/flang/test/Driver/predefined-macros-powerpc2.f90
@@ -1,13 +1,25 @@
 ! Test predefined macro for PowerPC architecture
 
-! RUN: %flang_fc1 -triple ppc64le-unknown-linux -cpp -E %s | FileCheck %s
+! RUN: %flang_fc1 -triple ppc64le-unknown-linux -cpp -E %s | FileCheck %s -check-prefix=CHECK-LINUX
+! RUN: %flang_fc1 -triple powerpc-unknown-aix -cpp -E %s | FileCheck %s -check-prefix=CHECK-AIX32
+! RUN: %flang_fc1 -triple powerpc64-unknown-aix -cpp -E %s | FileCheck %s -check-prefix=CHECK-AIX64
 ! REQUIRES: target=powerpc{{.*}}
 
-! CHECK: integer :: var1 = 1
-! CHECK: integer :: var2 = 1
+! CHECK-LINUX: integer :: var1 = 1
+! CHECK-LINUX: integer :: var2 = 1
+! CHECK-AIX32: integer :: var1 = 1
+! CHECK-AIX32: integer :: var2 = 1
+! CHECK-AIX32: integer :: var3 = __64BIT__
+! CHECK-AIX64: integer :: var1 = 1
+! CHECK-AIX64: integer :: var2 = 1
+! CHECK-AIX64: integer :: var3 = 1
 
 #if defined(__linux__) && defined(__powerpc__)
   integer :: var1 = __powerpc__
   integer :: var2 = __linux__
+#elif defined(_AIX) && defined(__powerpc__)
+  integer :: var1 = __powerpc__
+  integer :: var2 = _AIX
+  integer :: var3 = __64BIT__
 #endif
 end program

>From b7db2e1fc0705807fe7f02b790d1a7c998287bda Mon Sep 17 00:00:00 2001
From: Yanzuo Liu <zwuis at outlook.com>
Date: Thu, 8 May 2025 03:39:31 +0800
Subject: [PATCH 041/115] [Clang-Tidy][NFC] Simplify check
 cppcoreguidelines-missing-std-forward (#138504)

Remove `CaptureInCopy` because the cases handled by it are covered by
`CaptureByRefExplicit`.
---
 .../clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp
index bbb35228ce47f..cf299609e646d 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp
@@ -92,19 +92,15 @@ void MissingStdForwardCheck::registerMatchers(MatchFinder *Finder) {
                                   declRefExpr(to(equalsBoundNode("param"))))));
   auto RefToParm = capturesVar(
       varDecl(anyOf(hasSameNameAsBoundNode("param"), RefToParmImplicit)));
-  auto HasRefToParm = hasAnyCapture(RefToParm);
 
   auto CaptureInRef =
       allOf(hasCaptureDefaultKind(LambdaCaptureDefault::LCD_ByRef),
             unless(hasAnyCapture(
                 capturesVar(varDecl(hasSameNameAsBoundNode("param"))))));
-  auto CaptureInCopy = allOf(
-      hasCaptureDefaultKind(LambdaCaptureDefault::LCD_ByCopy), HasRefToParm);
   auto CaptureByRefExplicit = hasAnyCapture(
       allOf(hasCaptureKind(LambdaCaptureKind::LCK_ByRef), RefToParm));
 
-  auto CapturedInBody =
-      lambdaExpr(anyOf(CaptureInRef, CaptureInCopy, CaptureByRefExplicit));
+  auto CapturedInBody = lambdaExpr(anyOf(CaptureInRef, CaptureByRefExplicit));
   auto CapturedInCaptureList = hasAnyCapture(capturesVar(
       varDecl(hasInitializer(ignoringParenImpCasts(equalsBoundNode("call"))))));
 

>From 79bc8ad5b73e123cf71bbc6084f2a1bc525eca2b Mon Sep 17 00:00:00 2001
From: jeremyd2019 <github at jdrake.com>
Date: Wed, 7 May 2025 12:39:36 -0700
Subject: [PATCH 042/115] [LLVM][Support] check for error return from dladdr.
 (#138369)

In case of an error, the DL_info struct may have been left
uninitialized, so it is not safe to use its members.

In one error case, initialize dli_sname to nullptr explicitly, so that
the later check against nullptr is guaranteed to be safe.
---
 llvm/lib/Support/Unix/Signals.inc | 34 +++++++++++++++++++------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 691e1014f18e8..6668a2953b3b2 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -826,14 +826,17 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
   int width = 0;
   for (int i = 0; i < depth; ++i) {
     Dl_info dlinfo;
-    dladdr(StackTrace[i], &dlinfo);
-    const char *name = strrchr(dlinfo.dli_fname, '/');
-
     int nwidth;
-    if (!name)
-      nwidth = strlen(dlinfo.dli_fname);
-    else
-      nwidth = strlen(name) - 1;
+    if (dladdr(StackTrace[i], &dlinfo) == 0) {
+      nwidth = 7; // "(error)"
+    } else {
+      const char *name = strrchr(dlinfo.dli_fname, '/');
+
+      if (!name)
+        nwidth = strlen(dlinfo.dli_fname);
+      else
+        nwidth = strlen(name) - 1;
+    }
 
     if (nwidth > width)
       width = nwidth;
@@ -841,15 +844,20 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
 
   for (int i = 0; i < depth; ++i) {
     Dl_info dlinfo;
-    dladdr(StackTrace[i], &dlinfo);
 
     OS << format("%-2d", i);
 
-    const char *name = strrchr(dlinfo.dli_fname, '/');
-    if (!name)
-      OS << format(" %-*s", width, static_cast<const char *>(dlinfo.dli_fname));
-    else
-      OS << format(" %-*s", width, name + 1);
+    if (dladdr(StackTrace[i], &dlinfo) == 0) {
+      OS << format(" %-*s", width, static_cast<const char *>("(error)"));
+      dlinfo.dli_sname = nullptr;
+    } else {
+      const char *name = strrchr(dlinfo.dli_fname, '/');
+      if (!name)
+        OS << format(" %-*s", width,
+                     static_cast<const char *>(dlinfo.dli_fname));
+      else
+        OS << format(" %-*s", width, name + 1);
+    }
 
     OS << format(" %#0*lx", (int)(sizeof(void *) * 2) + 2,
                  (unsigned long)StackTrace[i]);

>From 7f4e36ebf61257c15c5bf5e80b08a2c9402b245d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 7 May 2025 20:47:36 +0100
Subject: [PATCH 043/115] [VPlan] Create PHI VPInstruction using VPBuilder
 (NFC).

Use builder to create scalar PHI VPInstructions.
---
 .../Transforms/Vectorize/LoopVectorizationPlanner.h |  6 ++++++
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp   | 13 ++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 981ff7fc2364d..1b06c8b6ee3bd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -249,6 +249,12 @@ class VPBuilder {
         new VPInstruction(Ptr, Offset, GEPNoWrapFlags::inBounds(), DL, Name));
   }
 
+  VPInstruction *createScalarPhi(ArrayRef<VPValue *> IncomingValues,
+                                 DebugLoc DL, const Twine &Name = "") {
+    return tryInsertInstruction(
+        new VPInstruction(Instruction::PHI, IncomingValues, DL, Name));
+  }
+
   /// Convert the input value \p Current to the corresponding value of an
   /// induction with \p Start and \p Step values, using \p Start + \p Current *
   /// \p Step.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7093d378d8c3e..eba8b16bf288d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2093,17 +2093,16 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
     // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
     VPValue *MaxEVL = &Plan.getVF();
     // Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer.
+    VPBuilder Builder(LoopRegion->getPreheaderVPBB());
     if (unsigned VFSize =
             TypeInfo.inferScalarType(MaxEVL)->getScalarSizeInBits();
         VFSize != 32) {
-      VPBuilder Builder(LoopRegion->getPreheaderVPBB());
       MaxEVL = Builder.createScalarCast(
           VFSize > 32 ? Instruction::Trunc : Instruction::ZExt, MaxEVL,
           Type::getInt32Ty(Ctx), DebugLoc());
     }
-    PrevEVL = new VPInstruction(Instruction::PHI, {MaxEVL, &EVL}, DebugLoc(),
-                                "prev.evl");
-    PrevEVL->insertBefore(*Header, Header->getFirstNonPhi());
+    Builder.setInsertPoint(Header, Header->getFirstNonPhi());
+    PrevEVL = Builder.createScalarPhi({MaxEVL, &EVL}, DebugLoc(), "prev.evl");
   }
 
   for (VPUser *U : to_vector(Plan.getVF().users())) {
@@ -2433,10 +2432,10 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
         auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
         StringRef Name =
             isa<VPCanonicalIVPHIRecipe>(PhiR) ? "index" : "evl.based.iv";
-        auto *ScalarR = new VPInstruction(
-            Instruction::PHI, {PhiR->getStartValue(), PhiR->getBackedgeValue()},
+        VPBuilder Builder(PhiR);
+        auto *ScalarR = Builder.createScalarPhi(
+            {PhiR->getStartValue(), PhiR->getBackedgeValue()},
             PhiR->getDebugLoc(), Name);
-        ScalarR->insertBefore(PhiR);
         PhiR->replaceAllUsesWith(ScalarR);
         ToRemove.push_back(PhiR);
         continue;

>From 9048c2d4f239cb47fed17cb150e2bbf3934454c2 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Wed, 7 May 2025 21:52:21 +0200
Subject: [PATCH 044/115] Revert "[analyzer] Make it a noop when initializing a
 field of empty record" (#138951)

Reverts llvm/llvm-project#138594

Crashes, see: https://lab.llvm.org/buildbot/#/builders/144/builds/24534
---
 .../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp |  7 +--
 clang/test/Analysis/issue-137252.cpp          | 50 -------------------
 2 files changed, 1 insertion(+), 56 deletions(-)
 delete mode 100644 clang/test/Analysis/issue-137252.cpp

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index ff07402a29bba..92ce3fa2225c8 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -10,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/ASTContext.h"
 #include "clang/AST/AttrIterator.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/ParentMap.h"
@@ -716,11 +715,7 @@ void ExprEngine::handleConstructor(const Expr *E,
         // actually make things worse. Placement new makes this tricky as well,
         // since it's then possible to be initializing one part of a multi-
         // dimensional array.
-        const CXXRecordDecl *TargetHeldRecord =
-            cast<CXXRecordDecl>(CE->getType()->getAsRecordDecl());
-
-        if (!TargetHeldRecord || !TargetHeldRecord->isEmpty())
-          State = State->bindDefaultZero(Target, LCtx);
+        State = State->bindDefaultZero(Target, LCtx);
       }
 
       Bldr.generateNode(CE, N, State, /*tag=*/nullptr,
diff --git a/clang/test/Analysis/issue-137252.cpp b/clang/test/Analysis/issue-137252.cpp
deleted file mode 100644
index 6ca3e20ccbbca..0000000000000
--- a/clang/test/Analysis/issue-137252.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus -verify %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus -verify %s -DEMPTY_CLASS
-// UNSUPPORTED: system-windows
-// expected-no-diagnostics
-
-// This test reproduces the issue that previously the static analyzer
-// initialized an [[no_unique_address]] empty field to zero,
-// over-writing a non-empty field with the same offset.
-
-namespace std {
-#ifdef EMPTY_CLASS
-
-  struct default_delete {};
-  template <class _Tp, class _Dp = default_delete >
-#else
-  // Class with methods and static members is still empty:
-  template <typename T>
-  class default_delete {
-    T dump();
-    static T x;
-  };
-  template <class _Tp, class _Dp = default_delete<_Tp> >
-#endif
-  class unique_ptr {
-    [[no_unique_address]]  _Tp * __ptr_;
-    [[no_unique_address]] _Dp __deleter_;
-
-  public:
-    explicit unique_ptr(_Tp* __p) noexcept
-      : __ptr_(__p),
-        __deleter_() {}
-
-    ~unique_ptr() {
-      delete __ptr_;
-    }
-  };
-}
-
-struct X {};
-
-int main()
-{
-  // Previously a leak falsely reported here.  It was because the
-  // Static Analyzer engine simulated the initialization of
-  // `__deleter__` incorrectly.  The engine assigned zero to
-  // `__deleter__`--an empty record sharing offset with `__ptr__`.
-  // The assignment over wrote `__ptr__`.
-  std::unique_ptr<X> a(new X()); 
-  return 0;
-}

>From 40941f15962191d0236ecdc29cd6937abce974fb Mon Sep 17 00:00:00 2001
From: Jacob Lalonde <jalalonde at fb.com>
Date: Wed, 7 May 2025 12:53:37 -0700
Subject: [PATCH 045/115] [LLDB][Minidump] Add some buffer directories
 (#138943)

Add a generous amount of buffer directories. I found out some LLDB forks
(internal and external) had custom ranges that could fail because we
didn't pre-account for those. To prevent this from being a problem, I've
added a large number of buffer directories at the cost of 240 bytes.
---
 .../Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp     | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
index 38806dfc8e5b5..d2ca5b26c9ec9 100644
--- a/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
+++ b/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp
@@ -75,6 +75,12 @@ Status MinidumpFileBuilder::AddHeaderAndCalculateDirectories() {
     }
   }
 
+  // Add a generous buffer of directories, these are quite small
+  // and forks may add new directories upstream LLDB hadn't accounted for
+  // when we started pre-calculating directory size, so this should account for
+  // that
+  m_expected_directories += 100;
+
   m_saved_data_size +=
       m_expected_directories * sizeof(llvm::minidump::Directory);
   Status error;

>From 1a7cd92c8607bbad5c212f474a1e46043a8016cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Wed, 7 May 2025 12:56:11 -0700
Subject: [PATCH 046/115] [flang][cuda] Update syncthreads interface (#138023)

---
 flang/module/cudadevice.f90 | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 9bd90bcfc30ec..f8a30da8b9615 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -17,9 +17,8 @@ module cudadevice
 
   ! Synchronization Functions
 
-  interface
-    attributes(device) subroutine syncthreads()
-    end subroutine
+  interface syncthreads
+    procedure :: syncthreads
   end interface
 
   interface
@@ -1614,4 +1613,9 @@ attributes(device,host) logical function on_device() bind(c)
     end function
   end interface
 
+contains
+
+  attributes(device) subroutine syncthreads()
+  end subroutine
+
 end module

>From 384a5b00a7c8fffa72f7fe7021863d00da842a19 Mon Sep 17 00:00:00 2001
From: vaibhav <73255802+mrdaybird at users.noreply.github.com>
Date: Thu, 8 May 2025 01:32:21 +0530
Subject: [PATCH 047/115] [LAA] Use MaxStride instead of CommonStride to
 calculate MaxVF (#98142)

We bail out from MaxVF calculation if the strides are not the same.
Instead, we are dependent on runtime checks, though not yet implemented.
We could instead use the MaxStride to conservatively use an upper bound.

This handles cases like the following:
```c
#define LEN 256 * 256
float a[LEN];

void gather() {
  for (int i = 0; i < LEN - 1024 - 255; i++) {
  #pragma clang loop interleave(disable)
  #pragma clang loop unroll(disable)
    for (int j = 0; j < 256; j++)
      a[i + j + 1024] += a[j * 4 + i];
  }
}
```

---------

Co-authored-by: Florian Hahn <flo at fhahn.com>
---
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      |  13 +-
 .../LoopAccessAnalysis/different_strides.ll   | 156 ++++++++++++++++++
 2 files changed, 162 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/different_strides.ll

diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 7ec9bdb318ffb..f222a9905c3bb 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2148,10 +2148,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
                          "different type sizes\n");
     return Dependence::Unknown;
   }
-
-  if (!CommonStride)
-    return Dependence::Unknown;
-
   // Bail out early if passed-in parameters make vectorization not feasible.
   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
                            VectorizerParams::VectorizationFactor : 1);
@@ -2162,7 +2158,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   // It's not vectorizable if the distance is smaller than the minimum distance
   // needed for a vectroized/unrolled version. Vectorizing one iteration in
-  // front needs CommonStride. Vectorizing the last iteration needs TypeByteSize
+  // front needs MaxStride. Vectorizing the last iteration needs TypeByteSize.
   // (No need to plus the last gap distance).
   //
   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
@@ -2186,11 +2182,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
   // the minimum distance needed is 28, which is greater than distance. It is
   // not safe to do vectorization.
+  //
+  // We use MaxStride (maximum of src and sink strides) to get a conservative
+  // lower bound on the MinDistanceNeeded in case of different strides.
 
   // We know that Dist is positive, but it may not be constant. Use the signed
   // minimum for computations below, as this ensures we compute the closest
   // possible dependence distance.
-  uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1) + TypeByteSize;
+  uint64_t MinDistanceNeeded = MaxStride * (MinNumIter - 1) + TypeByteSize;
   if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
     if (!ConstDist) {
       // For non-constant distances, we checked the lower bound of the
@@ -2236,7 +2235,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
       couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride))
     return Dependence::BackwardVectorizableButPreventsForwarding;
 
-  uint64_t MaxVF = MinDepDistBytes / *CommonStride;
+  uint64_t MaxVF = MinDepDistBytes / MaxStride;
   LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
                     << " with max VF = " << MaxVF << '\n');
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different_strides.ll b/llvm/test/Analysis/LoopAccessAnalysis/different_strides.ll
new file mode 100644
index 0000000000000..c5f31de2f8233
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/different_strides.ll
@@ -0,0 +1,156 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="print<access-info>" %s 2>&1 | FileCheck %s
+
+ at a = dso_local local_unnamed_addr global [65536 x float] zeroinitializer, align 16
+
+; Generated from the following C code:
+; #define LEN 256 * 256
+; float a[LEN];
+;
+; void different_strides() {
+;   for (int i = 0; i < LEN - 1024 - 255; i++) {
+;   #pragma clang loop interleave(disable)
+;   #pragma clang loop unroll(disable)
+;     for (int j = 0; j < 256; j++)
+;       a[i + j + 1024] += a[j * 4 + i];
+;   }
+; }
+; The load and store have different strides(4 and 16 bytes respectively) but the store
+; is always at safe positive distance away from the load, thus BackwardVectorizable
+define void @different_strides_backward_vectorizable() {
+; CHECK-LABEL: 'different_strides_backward_vectorizable'
+; CHECK-NEXT:    inner.body:
+; CHECK-NEXT:      Memory dependences are safe with a maximum safe vector width of 2048 bits
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        BackwardVectorizable:
+; CHECK-NEXT:            %3 = load float, ptr %arrayidx, align 4 ->
+; CHECK-NEXT:            store float %add9, ptr %arrayidx8, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %5 = load float, ptr %arrayidx8, align 4 ->
+; CHECK-NEXT:            store float %add9, ptr %arrayidx8, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+; CHECK-NEXT:    outer.header:
+; CHECK-NEXT:      Report: loop is not the innermost loop
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %outer.header
+
+outer.header:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
+  %0 = add nuw nsw i64 %i, 1024
+  br label %inner.body
+
+inner.body:
+  %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
+  %1 = shl nuw nsw i64 %j, 2
+  %2 = add nuw nsw i64 %1, %i
+  %arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
+  %3 = load float, ptr %arrayidx, align 4
+  %4 = add nuw nsw i64 %0, %j
+  %arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
+  %5 = load float, ptr %arrayidx8, align 4
+  %add9 = fadd fast float %5, %3
+  store float %add9, ptr %arrayidx8, align 4
+  %j.next = add nuw nsw i64 %j, 1
+  %exitcond.not = icmp eq i64 %j.next, 256
+  br i1 %exitcond.not, label %outer.latch, label %inner.body
+
+outer.latch:
+  %i.next = add nuw nsw i64 %i, 1
+  %outerexitcond.not = icmp eq i64 %i.next, 64257
+  br i1 %outerexitcond.not, label %exit, label %outer.header
+
+exit:
+  ret void
+}
+
+
+; Generated from following C code:
+; void different_stride_and_not_vectorizable(){
+;    for(int i = 0; i < LEN2; i++){
+;        for(int j = 0 ; j < LEN; j++){
+;            a[i + j + LEN] += a[i + 4*j];
+;        }
+;    }
+; }
+; The load and store have different strides, but the store and load are not at a
+; safe distance away from each other, thus not safe for vectorization.
+define void @different_stride_and_not_vectorizable() {
+; CHECK-LABEL: 'different_stride_and_not_vectorizable'
+; CHECK-NEXT:    inner.body:
+; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:            %3 = load float, ptr %arrayidx, align 4 ->
+; CHECK-NEXT:            store float %add9, ptr %arrayidx8, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:        Forward:
+; CHECK-NEXT:            %5 = load float, ptr %arrayidx8, align 4 ->
+; CHECK-NEXT:            store float %add9, ptr %arrayidx8, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+; CHECK-NEXT:    outer.header:
+; CHECK-NEXT:      Report: loop is not the innermost loop
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %outer.header
+
+outer.header:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
+  %0 = add nuw nsw i64 %i, 256
+  br label %inner.body
+
+inner.body:
+  %j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
+  %1 = shl nuw nsw i64 %j, 2
+  %2 = add nuw nsw i64 %1, %i
+  %arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
+  %3 = load float, ptr %arrayidx, align 4
+  %4 = add nuw nsw i64 %0, %j
+  %arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
+  %5 = load float, ptr %arrayidx8, align 4
+  %add9 = fadd fast float %5, %3
+  store float %add9, ptr %arrayidx8, align 4
+  %j.next = add nuw nsw i64 %j, 1
+  %exitcond.not = icmp eq i64 %j.next, 256
+  br i1 %exitcond.not, label %outer.latch, label %inner.body
+
+outer.latch:
+  %i.next = add nuw nsw i64 %i, 1
+  %exitcond29.not = icmp eq i64 %i.next, 65536
+  br i1 %exitcond29.not, label %exit, label %outer.header
+
+exit:
+  ret void
+}

>From 9da103ab9e656274357a1f09456431dac84ac549 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 7 May 2025 21:02:44 +0100
Subject: [PATCH 048/115] [LAA] Update remaining tests after 384a5b00a7.

---
 ...ent-strides-safe-dep-due-to-backedge-taken-count.ll |  9 ++++-----
 .../non-constant-strides-backward.ll                   | 10 ++++------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
index 8c7df4bdf5a5a..0d1b0829c09da 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll
@@ -106,13 +106,12 @@ exit:
   ret void
 }
 
-define void @unknown_dep_not_known_safe_due_to_backedge_taken_count(ptr %A) {
-; CHECK-LABEL: 'unknown_dep_not_known_safe_due_to_backedge_taken_count'
+define void @backward_dep_known_distance_less_than_btc(ptr %A) {
+; CHECK-LABEL: 'backward_dep_known_distance_less_than_btc'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with a maximum safe vector width of 8160 bits
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        BackwardVectorizable:
 ; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
 ; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll
index 416742a94e0d3..d263749ea1f46 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll
@@ -45,10 +45,9 @@ exit:
 define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with a maximum safe vector width of 4096 bits
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        BackwardVectorizable:
 ; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
 ; CHECK-EMPTY:
@@ -83,10 +82,9 @@ exit:
 define void @different_non_constant_strides_known_backward_min_distance_16(ptr %A) {
 ; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_16'
 ; CHECK-NEXT:    loop:
-; CHECK-NEXT:      Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
-; CHECK-NEXT:  Unknown data dependence.
+; CHECK-NEXT:      Memory dependences are safe with a maximum safe vector width of 64 bits
 ; CHECK-NEXT:      Dependences:
-; CHECK-NEXT:        Unknown:
+; CHECK-NEXT:        BackwardVectorizable:
 ; CHECK-NEXT:            %l = load i32, ptr %gep, align 4 ->
 ; CHECK-NEXT:            store i32 %add, ptr %gep.mul.2, align 4
 ; CHECK-EMPTY:

>From 43c514bd42d3e12dd299c0a7165b3e079e9efd38 Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot at gmail.com>
Date: Wed, 7 May 2025 22:23:41 +0200
Subject: [PATCH 049/115] [Clang] Deprecate `__is_trivially_relocatable`
 (#138835)

The C++26 standard relocatable type traits has slightly different
semantics, so we introduced a new
``__builtin_is_cpp_trivially_relocatable`` when implementing trivial
relocation in #127636.

However, having multiple relocatable traits would be confusing in the
long run, so we deprecate the old trait.

As discussed in #127636

`__builtin_is_cpp_trivially_relocatable` should be used instead.

---------

Co-authored-by: Aaron Ballman <aaron at aaronballman.com>
---
 clang/docs/LanguageExtensions.rst            |   8 +-
 clang/docs/ReleaseNotes.rst                  |   9 ++
 clang/include/clang/Basic/TokenKinds.def     |   6 +-
 clang/lib/Sema/SemaExprCXX.cpp               |   3 +
 clang/test/SemaCXX/attr-trivial-abi.cpp      | 116 +++++++++++++------
 clang/test/SemaCXX/ptrauth-triviality.cpp    |  35 ++++--
 clang/test/SemaCXX/type-traits-nonobject.cpp |  16 ++-
 7 files changed, 140 insertions(+), 53 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index ebcad44197ce4..f56f2a640bb36 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1859,12 +1859,18 @@ The following type trait primitives are supported by Clang. Those traits marked
 * ``__is_trivially_constructible`` (C++, GNU, Microsoft)
 * ``__is_trivially_copyable`` (C++, GNU, Microsoft)
 * ``__is_trivially_destructible`` (C++, MSVC 2013)
-* ``__is_trivially_relocatable`` (Clang): Returns true if moving an object
+* ``__is_trivially_relocatable`` (Clang) (Deprecated,
+  use ``__builtin_is_cpp_trivially_relocatable`` instead).
+  Returns true if moving an object
   of the given type, and then destroying the source object, is known to be
   functionally equivalent to copying the underlying bytes and then dropping the
   source object on the floor. This is true of trivial types,
   C++26 relocatable types, and types which
   were made trivially relocatable via the ``clang::trivial_abi`` attribute.
+  This trait is deprecated and should be replaced by
+  ``__builtin_is_cpp_trivially_relocatable``. Note however that it is generally
+  unsafe to relocate a C++-relocatable type with ``memcpy`` or ``memmove``;
+  use ``__builtin_trivially_relocate``.
 * ``__builtin_is_cpp_trivially_relocatable`` (C++): Returns true if an object
   is trivially relocatable, as defined by the C++26 standard [meta.unary.prop].
   Note that when relocating the caller code should ensure that if the object is polymorphic,
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 89d7f137d0fe0..9c4ab80537ac9 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -574,6 +574,15 @@ Bug Fixes to Compiler Builtins
 - ``__has_unique_object_representations(Incomplete[])`` is no longer accepted, per
   `LWG4113 <https://cplusplus.github.io/LWG/issue4113>`_.
 
+- ``__builtin_is_cpp_trivially_relocatable``, ``__builtin_is_replaceable`` and
+  ``__builtin_trivially_relocate`` have been added to support standard C++26 relocation.
+
+- ``__is_trivially_relocatable`` has been deprecated, and uses should be replaced by
+  ``__builtin_is_cpp_trivially_relocatable``.
+  Note that, it is generally unsafe to ``memcpy`` non-trivially copyable types that
+  are ``__builtin_is_cpp_trivially_relocatable``. It is recommended to use
+  ``__builtin_trivially_relocate`` instead.
+
 Bug Fixes to Attribute Support
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  - Fixed crash when a parameter to the ``clang::annotate`` attribute evaluates to ``void``. See #GH119125
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 9bc63689d1363..94e72fea56a68 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -544,7 +544,6 @@ TYPE_TRAIT_2(__is_pointer_interconvertible_base_of, IsPointerInterconvertibleBas
 #include "clang/Basic/TransformTypeTraits.def"
 
 // Clang-only C++ Type Traits
-TYPE_TRAIT_1(__is_trivially_relocatable, IsTriviallyRelocatable, KEYCXX)
 TYPE_TRAIT_1(__is_trivially_equality_comparable, IsTriviallyEqualityComparable, KEYCXX)
 TYPE_TRAIT_1(__is_bounded_array, IsBoundedArray, KEYCXX)
 TYPE_TRAIT_1(__is_unbounded_array, IsUnboundedArray, KEYCXX)
@@ -556,8 +555,11 @@ TYPE_TRAIT_2(__reference_converts_from_temporary, ReferenceConvertsFromTemporary
 // IsDeducible is only used internally by clang for CTAD implementation and
 // is not exposed to users.
 TYPE_TRAIT_2(/*EmptySpellingName*/, IsDeducible, KEYCXX)
-TYPE_TRAIT_1(__is_bitwise_cloneable, IsBitwiseCloneable, KEYALL)
+
+// __is_trivially_relocatable is deprecated
 TYPE_TRAIT_1(__builtin_is_cpp_trivially_relocatable, IsCppTriviallyRelocatable, KEYCXX)
+TYPE_TRAIT_1(__is_trivially_relocatable, IsTriviallyRelocatable, KEYCXX)
+TYPE_TRAIT_1(__is_bitwise_cloneable, IsBitwiseCloneable, KEYALL)
 TYPE_TRAIT_1(__builtin_is_replaceable, IsReplaceable, KEYCXX)
 TYPE_TRAIT_1(__builtin_structured_binding_size, StructuredBindingSize, KEYCXX)
 
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 8bdc2300b0392..b2a982e953012 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -6449,6 +6449,9 @@ void DiagnoseBuiltinDeprecation(Sema& S, TypeTrait Kind,
     case UTT_HasTrivialDestructor:
       Replacement = UTT_IsTriviallyDestructible;
       break;
+    case UTT_IsTriviallyRelocatable:
+      Replacement = clang::UTT_IsCppTriviallyRelocatable;
+      break;
     default:
       return;
   }
diff --git a/clang/test/SemaCXX/attr-trivial-abi.cpp b/clang/test/SemaCXX/attr-trivial-abi.cpp
index e018ccda2d8d9..333ab34bc5d51 100644
--- a/clang/test/SemaCXX/attr-trivial-abi.cpp
+++ b/clang/test/SemaCXX/attr-trivial-abi.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++11
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-windows-msvc -std=c++11
+
 
 void __attribute__((trivial_abi)) foo(); // expected-warning {{'trivial_abi' attribute only applies to classes}}
 
@@ -10,30 +12,38 @@ class __attribute__((trivial_abi)) a { a(a &&); };
 // (And it is only trivially relocatable, currently, if it is trivial for calls.)
 // In this case, it is suppressed by an explicitly defined move constructor.
 // Similar concerns apply to later tests that have #if defined(_WIN64) && !defined(__MINGW32__)
-static_assert(!__is_trivially_relocatable(a<int>), "");
+static_assert(!__is_trivially_relocatable(a<int>), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(a<int>), "");
 #else
-static_assert(__is_trivially_relocatable(a<int>), "");
+static_assert(__is_trivially_relocatable(a<int>), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(a<int>), "");
 #endif
 
 struct [[clang::trivial_abi]] S0 {
   int a;
 };
-static_assert(__is_trivially_relocatable(S0), "");
+static_assert(__is_trivially_relocatable(S0), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S0), "");
 
 struct __attribute__((trivial_abi)) S1 {
   int a;
 };
-static_assert(__is_trivially_relocatable(S1), "");
+static_assert(__is_trivially_relocatable(S1), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S1), "");
+
 
 struct __attribute__((trivial_abi)) S3 { // expected-warning {{'trivial_abi' cannot be applied to 'S3'}} expected-note {{is polymorphic}}
   virtual void m();
 };
-static_assert(!__is_trivially_relocatable(S3), "");
+static_assert(!__is_trivially_relocatable(S3), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S3), "");
+
 
 struct S3_2 {
   virtual void m();
 } __attribute__((trivial_abi)); // expected-warning {{'trivial_abi' cannot be applied to 'S3_2'}} expected-note {{is polymorphic}}
-static_assert(!__is_trivially_relocatable(S3_2), "");
+static_assert(!__is_trivially_relocatable(S3_2), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S3_2), "");
 
 struct __attribute__((trivial_abi)) S3_3 { // expected-warning {{'trivial_abi' cannot be applied to 'S3_3'}} expected-note {{has a field of a non-trivial class type}}
   S3_3(S3_3 &&);
@@ -43,9 +53,13 @@ struct __attribute__((trivial_abi)) S3_3 { // expected-warning {{'trivial_abi' c
 // The ClangABI4OrPS4 calling convention kind passes classes in registers if the
 // copy constructor is trivial for calls *or deleted*, while other platforms do
 // not accept deleted constructors.
-static_assert(__is_trivially_relocatable(S3_3), "");
+static_assert(__is_trivially_relocatable(S3_3), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S3_3), "");
+
 #else
-static_assert(!__is_trivially_relocatable(S3_3), "");
+static_assert(!__is_trivially_relocatable(S3_3), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(S3_3), "");
+
 #endif
 
 // Diagnose invalid trivial_abi even when the type is templated because it has a non-trivial field.
@@ -54,20 +68,28 @@ struct __attribute__((trivial_abi)) S3_4 { // expected-warning {{'trivial_abi' c
   S3_4(S3_4 &&);
   S3_2 s32;
 };
-static_assert(!__is_trivially_relocatable(S3_4<int>), "");
+static_assert(!__is_trivially_relocatable(S3_4<int>), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(S3_4<int>), "");
+
 
 struct S4 {
   int a;
 };
-static_assert(__is_trivially_relocatable(S4), "");
+static_assert(__is_trivially_relocatable(S4), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S4), "");
+
 
 struct __attribute__((trivial_abi)) S5 : public virtual S4 { // expected-warning {{'trivial_abi' cannot be applied to 'S5'}} expected-note {{has a virtual base}}
 };
-static_assert(!__is_trivially_relocatable(S5), "");
+static_assert(!__is_trivially_relocatable(S5), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(S5), "");
+
 
 struct __attribute__((trivial_abi)) S9 : public S4 {
 };
-static_assert(__is_trivially_relocatable(S9), "");
+static_assert(__is_trivially_relocatable(S9), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S9), "");
+
 
 struct __attribute__((trivial_abi(1))) S8 { // expected-error {{'trivial_abi' attribute takes no arguments}}
   int a;
@@ -80,8 +102,12 @@ struct __attribute__((trivial_abi)) S10 {
 };
 
 S10<int *> p1;
-static_assert(__is_trivially_relocatable(S10<int>), "");
-static_assert(__is_trivially_relocatable(S10<S3>), "");
+static_assert(__is_trivially_relocatable(S10<int>), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S10<int>), "");
+
+static_assert(__is_trivially_relocatable(S10<S3>), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S10<S3>), "");
+
 
 template <class T>
 struct S14 {
@@ -93,15 +119,21 @@ struct __attribute__((trivial_abi)) S15 : S14<T> {
 };
 
 S15<int> s15;
-static_assert(__is_trivially_relocatable(S15<int>), "");
-static_assert(__is_trivially_relocatable(S15<S3>), "");
+static_assert(__is_trivially_relocatable(S15<int>), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S15<int>), "");
+
+static_assert(__is_trivially_relocatable(S15<S3>), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S15<S3>), "");
 
 template <class T>
 struct __attribute__((trivial_abi)) S16 {
   S14<T> a;
 };
-static_assert(__is_trivially_relocatable(S16<int>), "");
-static_assert(__is_trivially_relocatable(S16<S3>), "");
+static_assert(__is_trivially_relocatable(S16<int>), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S16<int>), "");
+
+static_assert(__is_trivially_relocatable(S16<S3>), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S16<S3>), "");
 
 S16<int> s16;
 
@@ -110,8 +142,12 @@ struct __attribute__((trivial_abi)) S17 {
 };
 
 S17<int> s17;
-static_assert(__is_trivially_relocatable(S17<int>), "");
-static_assert(__is_trivially_relocatable(S17<S3>), "");
+static_assert(__is_trivially_relocatable(S17<int>), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S17<int>), "");
+
+static_assert(__is_trivially_relocatable(S17<S3>), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S17<S3>), "");
+
 
 namespace deletedCopyMoveConstructor {
 struct __attribute__((trivial_abi)) CopyMoveDeleted { // expected-warning {{'trivial_abi' cannot be applied to 'CopyMoveDeleted'}} expected-note {{copy constructors and move constructors are all deleted}}
@@ -119,18 +155,24 @@ struct __attribute__((trivial_abi)) CopyMoveDeleted { // expected-warning {{'tri
   CopyMoveDeleted(CopyMoveDeleted &&) = delete;
 };
 #ifdef __ORBIS__
-static_assert(__is_trivially_relocatable(CopyMoveDeleted), "");
+static_assert(__is_trivially_relocatable(CopyMoveDeleted), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(CopyMoveDeleted), "");
+
 #else
-static_assert(!__is_trivially_relocatable(CopyMoveDeleted), "");
+static_assert(!__is_trivially_relocatable(CopyMoveDeleted), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(CopyMoveDeleted), "");
+
 #endif
 
 struct __attribute__((trivial_abi)) S18 { // expected-warning {{'trivial_abi' cannot be applied to 'S18'}} expected-note {{copy constructors and move constructors are all deleted}}
   CopyMoveDeleted a;
 };
 #ifdef __ORBIS__
-static_assert(__is_trivially_relocatable(S18), "");
+static_assert(__is_trivially_relocatable(S18), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S18), "");
 #else
-static_assert(!__is_trivially_relocatable(S18), "");
+static_assert(!__is_trivially_relocatable(S18), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(S18), "");
 #endif
 
 struct __attribute__((trivial_abi)) CopyDeleted {
@@ -138,25 +180,29 @@ struct __attribute__((trivial_abi)) CopyDeleted {
   CopyDeleted(CopyDeleted &&) = default;
 };
 #if defined(_WIN64) && !defined(__MINGW32__)
-static_assert(!__is_trivially_relocatable(CopyDeleted), "");
+static_assert(!__is_trivially_relocatable(CopyDeleted), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(CopyDeleted), "");
+
 #else
-static_assert(__is_trivially_relocatable(CopyDeleted), "");
+static_assert(__is_trivially_relocatable(CopyDeleted), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(CopyDeleted), "");
 #endif
 
 struct __attribute__((trivial_abi)) MoveDeleted {
   MoveDeleted(const MoveDeleted &) = default;
   MoveDeleted(MoveDeleted &&) = delete;
 };
-static_assert(__is_trivially_relocatable(MoveDeleted), "");
-
+static_assert(__is_trivially_relocatable(MoveDeleted), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(MoveDeleted), "");
 struct __attribute__((trivial_abi)) S19 { // expected-warning {{'trivial_abi' cannot be applied to 'S19'}} expected-note {{copy constructors and move constructors are all deleted}}
   CopyDeleted a;
   MoveDeleted b;
 };
 #ifdef __ORBIS__
-static_assert(__is_trivially_relocatable(S19), "");
-#else
-static_assert(!__is_trivially_relocatable(S19), "");
+static_assert(__is_trivially_relocatable(S19), ""); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S19), "");
+static_assert(!__is_trivially_relocatable(S19), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(S19), "");
 #endif
 
 // This is fine since the move constructor isn't deleted.
@@ -164,8 +210,12 @@ struct __attribute__((trivial_abi)) S20 {
   int &&a; // a member of rvalue reference type deletes the copy constructor.
 };
 #if defined(_WIN64) && !defined(__MINGW32__)
-static_assert(!__is_trivially_relocatable(S20), "");
+static_assert(!__is_trivially_relocatable(S20), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(S20), "");
+
 #else
-static_assert(__is_trivially_relocatable(S20), "");
+static_assert(__is_trivially_relocatable(S20), ""); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(S20), "");
+
 #endif
 } // namespace deletedCopyMoveConstructor
diff --git a/clang/test/SemaCXX/ptrauth-triviality.cpp b/clang/test/SemaCXX/ptrauth-triviality.cpp
index ce6e1a7646558..785e83aaaa545 100644
--- a/clang/test/SemaCXX/ptrauth-triviality.cpp
+++ b/clang/test/SemaCXX/ptrauth-triviality.cpp
@@ -1,6 +1,5 @@
 // RUN: %clang_cc1 -triple arm64-apple-ios -std=c++20 -fptrauth-calls -fptrauth-intrinsics -verify -fsyntax-only %s
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -std=c++20 -fptrauth-calls -fptrauth-intrinsics -verify -fsyntax-only %s
-// expected-no-diagnostics
 
 #define AQ __ptrauth(1,1,50)
 #define IQ __ptrauth(1,0,50)
@@ -24,7 +23,8 @@ static_assert(!__is_trivially_constructible(S1, const S1&));
 static_assert(!__is_trivially_assignable(S1, const S1&));
 static_assert(__is_trivially_destructible(S1));
 static_assert(!__is_trivially_copyable(S1));
-static_assert(!__is_trivially_relocatable(S1));
+static_assert(!__is_trivially_relocatable(S1)); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(S1));
 static_assert(!__is_trivially_equality_comparable(S1));
 
 static_assert(__is_trivially_constructible(Holder<S1>));
@@ -32,7 +32,8 @@ static_assert(!__is_trivially_constructible(Holder<S1>, const Holder<S1>&));
 static_assert(!__is_trivially_assignable(Holder<S1>, const Holder<S1>&));
 static_assert(__is_trivially_destructible(Holder<S1>));
 static_assert(!__is_trivially_copyable(Holder<S1>));
-static_assert(!__is_trivially_relocatable(Holder<S1>));
+static_assert(!__is_trivially_relocatable(Holder<S1>)); // expected-warning{{deprecated}}
+static_assert(!__builtin_is_cpp_trivially_relocatable(Holder<S1>));
 static_assert(!__is_trivially_equality_comparable(Holder<S1>));
 
 struct S2 {
@@ -45,7 +46,8 @@ static_assert(__is_trivially_constructible(S2, const S2&));
 static_assert(__is_trivially_assignable(S2, const S2&));
 static_assert(__is_trivially_destructible(S2));
 static_assert(__is_trivially_copyable(S2));
-static_assert(__is_trivially_relocatable(S2));
+static_assert(__is_trivially_relocatable(S2)); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(S2));
 static_assert(__is_trivially_equality_comparable(S2));
 
 static_assert(__is_trivially_constructible(Holder<S2>));
@@ -53,7 +55,8 @@ static_assert(__is_trivially_constructible(Holder<S2>, const Holder<S2>&));
 static_assert(__is_trivially_assignable(Holder<S2>, const Holder<S2>&));
 static_assert(__is_trivially_destructible(Holder<S2>));
 static_assert(__is_trivially_copyable(Holder<S2>));
-static_assert(__is_trivially_relocatable(Holder<S2>));
+static_assert(__is_trivially_relocatable(Holder<S2>)); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(Holder<S2>));
 static_assert(__is_trivially_equality_comparable(Holder<S2>));
 
 struct AA S3 {
@@ -67,15 +70,19 @@ static_assert(!__is_trivially_constructible(S3, const S3&));
 static_assert(!__is_trivially_assignable(S3, const S3&));
 static_assert(__is_trivially_destructible(S3));
 static_assert(!__is_trivially_copyable(S3));
-static_assert(!__is_trivially_relocatable(S3));
+static_assert(!__is_trivially_relocatable(S3)); // expected-warning{{deprecated}}
+//FIXME
+static_assert(__builtin_is_cpp_trivially_relocatable(S3));
 static_assert(!__is_trivially_equality_comparable(S3));
 
+
 static_assert(!__is_trivially_constructible(Holder<S3>));
 static_assert(!__is_trivially_constructible(Holder<S3>, const Holder<S3>&));
 static_assert(!__is_trivially_assignable(Holder<S3>, const Holder<S3>&));
 static_assert(__is_trivially_destructible(Holder<S3>));
 static_assert(!__is_trivially_copyable(Holder<S3>));
-static_assert(__is_trivially_relocatable(Holder<S3>));
+static_assert(__is_trivially_relocatable(Holder<S3>)); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(Holder<S3>));
 static_assert(!__is_trivially_equality_comparable(Holder<S3>));
 
 struct IA S4 {
@@ -89,7 +96,9 @@ static_assert(!__is_trivially_constructible(S4, const S4&));
 static_assert(!__is_trivially_assignable(S4, const S4&));
 static_assert(__is_trivially_destructible(S4));
 static_assert(!__is_trivially_copyable(S4));
-static_assert(!__is_trivially_relocatable(S4));
+static_assert(!__is_trivially_relocatable(S4)); // expected-warning{{deprecated}}
+//FIXME
+static_assert(__builtin_is_cpp_trivially_relocatable(S4));
 static_assert(!__is_trivially_equality_comparable(S4));
 
 static_assert(!__is_trivially_constructible(Holder<S4>));
@@ -97,7 +106,8 @@ static_assert(!__is_trivially_constructible(Holder<S4>, const Holder<S4>&));
 static_assert(!__is_trivially_assignable(Holder<S4>, const Holder<S4>&));
 static_assert(__is_trivially_destructible(Holder<S4>));
 static_assert(!__is_trivially_copyable(Holder<S4>));
-static_assert(__is_trivially_relocatable(Holder<S4>));
+static_assert(__is_trivially_relocatable(Holder<S4>)); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(Holder<S4>));
 static_assert(!__is_trivially_equality_comparable(Holder<S4>));
 
 struct PA S5 {
@@ -111,7 +121,9 @@ static_assert(!__is_trivially_constructible(S5, const S5&));
 static_assert(!__is_trivially_assignable(S5, const S5&));
 static_assert(__is_trivially_destructible(S5));
 static_assert(!__is_trivially_copyable(S5));
-static_assert(!__is_trivially_relocatable(S5));
+static_assert(!__is_trivially_relocatable(S5)); // expected-warning{{deprecated}}
+//FIXME
+static_assert(__builtin_is_cpp_trivially_relocatable(S5));
 static_assert(!__is_trivially_equality_comparable(S5));
 
 static_assert(!__is_trivially_constructible(Holder<S5>));
@@ -119,5 +131,6 @@ static_assert(!__is_trivially_constructible(Holder<S5>, const Holder<S5>&));
 static_assert(!__is_trivially_assignable(Holder<S5>, const Holder<S5>&));
 static_assert(__is_trivially_destructible(Holder<S5>));
 static_assert(!__is_trivially_copyable(Holder<S5>));
-static_assert(__is_trivially_relocatable(Holder<S5>));
+static_assert(__is_trivially_relocatable(Holder<S5>)); // expected-warning{{deprecated}}
+static_assert(__builtin_is_cpp_trivially_relocatable(Holder<S5>));
 static_assert(!__is_trivially_equality_comparable(Holder<S5>));
diff --git a/clang/test/SemaCXX/type-traits-nonobject.cpp b/clang/test/SemaCXX/type-traits-nonobject.cpp
index 5f7c20cc2e11c..1763d735547b9 100644
--- a/clang/test/SemaCXX/type-traits-nonobject.cpp
+++ b/clang/test/SemaCXX/type-traits-nonobject.cpp
@@ -1,8 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s
 
-// expected-no-diagnostics
-
 static_assert(!__is_pod(void), "");
 static_assert(!__is_pod(int&), "");
 static_assert(!__is_pod(int()), "");
@@ -13,7 +11,13 @@ static_assert(!__is_trivially_copyable(int&), "");
 static_assert(!__is_trivially_copyable(int()), "");
 static_assert(!__is_trivially_copyable(int()&), "");
 
-static_assert(!__is_trivially_relocatable(void), "");
-static_assert(!__is_trivially_relocatable(int&), "");
-static_assert(!__is_trivially_relocatable(int()), "");
-static_assert(!__is_trivially_relocatable(int()&), "");
+static_assert(!__is_trivially_relocatable(void), ""); // expected-warning{{deprecated}}
+static_assert(!__is_trivially_relocatable(int&), ""); // expected-warning{{deprecated}}
+static_assert(!__is_trivially_relocatable(int()), ""); // expected-warning{{deprecated}}
+static_assert(!__is_trivially_relocatable(int()&), ""); // expected-warning{{deprecated}}
+
+
+static_assert(!__builtin_is_cpp_trivially_relocatable(void), "");
+static_assert(!__builtin_is_cpp_trivially_relocatable(int&), "");
+static_assert(!__builtin_is_cpp_trivially_relocatable(int()), "");
+static_assert(!__builtin_is_cpp_trivially_relocatable(int()&), "");

>From 62a2f0fdc74f086c875833eefe9099bd30f41957 Mon Sep 17 00:00:00 2001
From: fahadnayyar <30953967+fahadnayyar at users.noreply.github.com>
Date: Wed, 7 May 2025 13:42:39 -0700
Subject: [PATCH 050/115] [APINotes] Add support for
 SWIFT_RETURED_AS_UNRETAINED_BY_DEFAULT (#138699)

This patch adds support in APINotes for annotating C++ user-defined
types with: `swift_attr("returned_as_unretained_by_default")`
This attribute allows to specify a default ownership convention for
return values of `SWIFT_SHARED_REFERENCE` c++ types. Specifically, it
marks all unannotated return values of this type as `unretained` (`+0`)
by default, unless explicitly overridden at the API level using
`swift_attr("returns_retained")` or `swift_attr("returns_unretained")`.

The corresponding Swift compiler support for this annotation enables
developers to suppress warnings about unannotated return ownership in
large codebases while maintaining safe and predictable ownership
semantics. By enabling this in APINotes, library authors can define this
behavior externally without needing to modify C++ source headers
directly.

### Example usage in APINotes:
```
- Name: RefCountedTypeWithDefaultConvention
  SwiftImportAs: reference
  SwiftDefaultOwnership: unretained

```
rdar://150764491
---
 clang/include/clang/APINotes/Types.h                      | 4 ++++
 clang/lib/APINotes/APINotesFormat.h                       | 2 +-
 clang/lib/APINotes/APINotesReader.cpp                     | 7 +++++++
 clang/lib/APINotes/APINotesWriter.cpp                     | 7 +++++++
 clang/lib/APINotes/APINotesYAMLCompiler.cpp               | 4 ++++
 clang/lib/Sema/SemaAPINotes.cpp                           | 3 +++
 clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes | 5 +++++
 clang/test/APINotes/Inputs/Headers/SwiftImportAs.h        | 4 ++++
 clang/test/APINotes/swift-import-as.cpp                   | 8 ++++++++
 9 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h
index 9c01978fd49ef..0f2e49613b514 100644
--- a/clang/include/clang/APINotes/Types.h
+++ b/clang/include/clang/APINotes/Types.h
@@ -737,6 +737,7 @@ class TagInfo : public CommonTypeInfo {
   std::optional<std::string> SwiftImportAs;
   std::optional<std::string> SwiftRetainOp;
   std::optional<std::string> SwiftReleaseOp;
+  std::optional<std::string> SwiftDefaultOwnership;
 
   /// The Swift protocol that this type should be automatically conformed to.
   std::optional<std::string> SwiftConformance;
@@ -786,6 +787,8 @@ class TagInfo : public CommonTypeInfo {
       SwiftRetainOp = RHS.SwiftRetainOp;
     if (!SwiftReleaseOp)
       SwiftReleaseOp = RHS.SwiftReleaseOp;
+    if (!SwiftDefaultOwnership)
+      SwiftDefaultOwnership = RHS.SwiftDefaultOwnership;
 
     if (!SwiftConformance)
       SwiftConformance = RHS.SwiftConformance;
@@ -815,6 +818,7 @@ inline bool operator==(const TagInfo &LHS, const TagInfo &RHS) {
          LHS.SwiftImportAs == RHS.SwiftImportAs &&
          LHS.SwiftRetainOp == RHS.SwiftRetainOp &&
          LHS.SwiftReleaseOp == RHS.SwiftReleaseOp &&
+         LHS.SwiftDefaultOwnership == RHS.SwiftDefaultOwnership &&
          LHS.SwiftConformance == RHS.SwiftConformance &&
          LHS.isFlagEnum() == RHS.isFlagEnum() &&
          LHS.isSwiftCopyable() == RHS.isSwiftCopyable() &&
diff --git a/clang/lib/APINotes/APINotesFormat.h b/clang/lib/APINotes/APINotesFormat.h
index 939235179c363..bb0c276e74964 100644
--- a/clang/lib/APINotes/APINotesFormat.h
+++ b/clang/lib/APINotes/APINotesFormat.h
@@ -24,7 +24,7 @@ const uint16_t VERSION_MAJOR = 0;
 /// API notes file minor version number.
 ///
 /// When the format changes IN ANY WAY, this number should be incremented.
-const uint16_t VERSION_MINOR = 34; // SwiftReturnOwnership
+const uint16_t VERSION_MINOR = 35; // SwiftDefaultOwnership
 
 const uint8_t kSwiftConforms = 1;
 const uint8_t kSwiftDoesNotConform = 2;
diff --git a/clang/lib/APINotes/APINotesReader.cpp b/clang/lib/APINotes/APINotesReader.cpp
index 646eabd2a5ecd..2ba30ca427ae4 100644
--- a/clang/lib/APINotes/APINotesReader.cpp
+++ b/clang/lib/APINotes/APINotesReader.cpp
@@ -624,6 +624,13 @@ class TagTableInfo
                                         ReleaseOpLength - 1);
       Data += ReleaseOpLength - 1;
     }
+    unsigned DefaultOwnershipLength =
+        endian::readNext<uint16_t, llvm::endianness::little>(Data);
+    if (DefaultOwnershipLength > 0) {
+      Info.SwiftDefaultOwnership = std::string(
+          reinterpret_cast<const char *>(Data), DefaultOwnershipLength - 1);
+      Data += DefaultOwnershipLength - 1;
+    }
     if (unsigned ConformanceLength =
             endian::readNext<uint16_t, llvm::endianness::little>(Data)) {
       Info.SwiftConformance = std::string(reinterpret_cast<const char *>(Data),
diff --git a/clang/lib/APINotes/APINotesWriter.cpp b/clang/lib/APINotes/APINotesWriter.cpp
index 1aae07bbdd30e..7578bc37b5c68 100644
--- a/clang/lib/APINotes/APINotesWriter.cpp
+++ b/clang/lib/APINotes/APINotesWriter.cpp
@@ -1274,6 +1274,7 @@ class TagTableInfo : public CommonTypeTableInfo<TagTableInfo, TagInfo> {
     return 2 + (TI.SwiftImportAs ? TI.SwiftImportAs->size() : 0) +
            2 + (TI.SwiftRetainOp ? TI.SwiftRetainOp->size() : 0) +
            2 + (TI.SwiftReleaseOp ? TI.SwiftReleaseOp->size() : 0) +
+           2 + (TI.SwiftDefaultOwnership ? TI.SwiftDefaultOwnership->size() : 0) +
            2 + (TI.SwiftConformance ? TI.SwiftConformance->size() : 0) +
            3 + getCommonTypeInfoSize(TI);
     // clang-format on
@@ -1322,6 +1323,12 @@ class TagTableInfo : public CommonTypeTableInfo<TagTableInfo, TagInfo> {
     } else {
       writer.write<uint16_t>(0);
     }
+    if (auto DefaultOwnership = TI.SwiftDefaultOwnership) {
+      writer.write<uint16_t>(DefaultOwnership->size() + 1);
+      OS.write(DefaultOwnership->c_str(), DefaultOwnership->size());
+    } else {
+      writer.write<uint16_t>(0);
+    }
     if (auto Conformance = TI.SwiftConformance) {
       writer.write<uint16_t>(Conformance->size() + 1);
       OS.write(Conformance->c_str(), Conformance->size());
diff --git a/clang/lib/APINotes/APINotesYAMLCompiler.cpp b/clang/lib/APINotes/APINotesYAMLCompiler.cpp
index 414a59a4f12d0..803410c54c646 100644
--- a/clang/lib/APINotes/APINotesYAMLCompiler.cpp
+++ b/clang/lib/APINotes/APINotesYAMLCompiler.cpp
@@ -460,6 +460,7 @@ struct Tag {
   std::optional<std::string> SwiftImportAs;
   std::optional<std::string> SwiftRetainOp;
   std::optional<std::string> SwiftReleaseOp;
+  std::optional<std::string> SwiftDefaultOwnership;
   std::optional<std::string> SwiftConformance;
   std::optional<EnumExtensibilityKind> EnumExtensibility;
   std::optional<bool> FlagEnum;
@@ -500,6 +501,7 @@ template <> struct MappingTraits<Tag> {
     IO.mapOptional("SwiftImportAs", T.SwiftImportAs);
     IO.mapOptional("SwiftReleaseOp", T.SwiftReleaseOp);
     IO.mapOptional("SwiftRetainOp", T.SwiftRetainOp);
+    IO.mapOptional("SwiftDefaultOwnership", T.SwiftDefaultOwnership);
     IO.mapOptional("SwiftConformsTo", T.SwiftConformance);
     IO.mapOptional("EnumExtensibility", T.EnumExtensibility);
     IO.mapOptional("FlagEnum", T.FlagEnum);
@@ -990,6 +992,8 @@ class YAMLConverter {
       TI.SwiftReleaseOp = T.SwiftReleaseOp;
     if (T.SwiftConformance)
       TI.SwiftConformance = T.SwiftConformance;
+    if (T.SwiftDefaultOwnership)
+      TI.SwiftDefaultOwnership = T.SwiftDefaultOwnership;
 
     if (T.SwiftCopyable)
       TI.setSwiftCopyable(T.SwiftCopyable);
diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp
index b354bb7b06435..def909fc2478d 100644
--- a/clang/lib/Sema/SemaAPINotes.cpp
+++ b/clang/lib/Sema/SemaAPINotes.cpp
@@ -643,6 +643,9 @@ static void ProcessAPINotes(Sema &S, TagDecl *D, const api_notes::TagInfo &Info,
   if (auto ReleaseOp = Info.SwiftReleaseOp)
     D->addAttr(
         SwiftAttrAttr::Create(S.Context, "release:" + ReleaseOp.value()));
+  if (auto DefaultOwnership = Info.SwiftDefaultOwnership)
+    D->addAttr(SwiftAttrAttr::Create(
+        S.Context, "returned_as_" + DefaultOwnership.value() + "_by_default"));
 
   if (auto ConformsTo = Info.SwiftConformance)
     D->addAttr(
diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes
index 88e0da1382d6c..66fc46e50ba0d 100644
--- a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes
+++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes
@@ -14,6 +14,11 @@ Tags:
   SwiftReleaseOp: RCRelease
   SwiftRetainOp: RCRetain
   SwiftConformsTo: MySwiftModule.MySwiftRefCountedProtocol
+- Name: RefCountedTypeWithDefaultConvention
+  SwiftImportAs: reference
+  SwiftReleaseOp: release
+  SwiftRetainOp: retain
+  SwiftDefaultOwnership: unretained
 - Name: NonCopyableType
   SwiftCopyable: false
   SwiftConformsTo: MySwiftModule.MySwiftNonCopyableProtocol
diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h
index b6900fee8a979..20b8f04b4a914 100644
--- a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h
+++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h
@@ -19,3 +19,7 @@ struct CopyableType { int value; };
 
 struct NonEscapableType { int value; };
 struct EscapableType { int value; };
+
+struct RefCountedTypeWithDefaultConvention {};
+inline void retain(RefCountedType *x) {}
+inline void release(RefCountedType *x) {}
diff --git a/clang/test/APINotes/swift-import-as.cpp b/clang/test/APINotes/swift-import-as.cpp
index 3981ef1ed419a..929f924f2afee 100644
--- a/clang/test/APINotes/swift-import-as.cpp
+++ b/clang/test/APINotes/swift-import-as.cpp
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers %s -x c++
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter ImmortalRefType | FileCheck -check-prefix=CHECK-IMMORTAL %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter RefCountedType | FileCheck -check-prefix=CHECK-REF-COUNTED %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter RefCountedTypeWithDefaultConvention | FileCheck -check-prefix=CHECK-REF-COUNTED-DEFAULT %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter NonCopyableType | FileCheck -check-prefix=CHECK-NON-COPYABLE %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter CopyableType | FileCheck -check-prefix=CHECK-COPYABLE %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter NonEscapableType | FileCheck -check-prefix=CHECK-NON-ESCAPABLE %s
@@ -26,6 +27,13 @@
 // CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:RCRelease"
 // CHECK-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "conforms_to:MySwiftModule.MySwiftRefCountedProtocol"
 
+// CHECK-REF-COUNTED-DEFAULT: Dumping RefCountedTypeWithDefaultConvention:
+// CHECK-REF-COUNTED-DEFAULT-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs {{.+}} struct RefCountedTypeWithDefaultConvention
+// CHECK-REF-COUNTED-DEFAULT: SwiftAttrAttr {{.+}} <<invalid sloc>> "import_reference"
+// CHECK-REF-COUNTED-DEFAULT: SwiftAttrAttr {{.+}} <<invalid sloc>> "retain:retain"
+// CHECK-REF-COUNTED-DEFAULT: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:release"
+// CHECK-REF-COUNTED-DEFAULT: SwiftAttrAttr {{.+}} <<invalid sloc>> "returned_as_unretained_by_default"
+
 // CHECK-NON-COPYABLE: Dumping NonCopyableType:
 // CHECK-NON-COPYABLE-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs {{.+}} struct NonCopyableType
 // CHECK-NON-COPYABLE: SwiftAttrAttr {{.+}} <<invalid sloc>> "conforms_to:MySwiftModule.MySwiftNonCopyableProtocol"

>From f4e7ba02cc7fd35f3e5ad82cf98c3220af7cd068 Mon Sep 17 00:00:00 2001
From: Erich Keane <ekeane at nvidia.com>
Date: Wed, 7 May 2025 13:48:17 -0700
Subject: [PATCH 051/115] [OpenACC][CIR] Implement 'worker'/'vector' lowering
 (#138765)

This patch implements worker and vector lowering for the loop construct,
which are fairly simple clauses, except that they also have a 'no
argument' form which requires a touch more work. Else, these are just
like a handful of other clauses where we just keep the device_type array
and operands in sync.
---
 clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h   |  32 +++++
 clang/test/CIR/CodeGenOpenACC/loop.cpp        | 130 ++++++++++++++++++
 .../mlir/Dialect/OpenACC/OpenACCOps.td        |  15 ++
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       |  28 ++++
 4 files changed, 205 insertions(+)

diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
index fa4ce5efc39ad..ef4f64a167742 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h
@@ -392,6 +392,38 @@ class OpenACCClauseCIREmitter final
       return clauseNotImplemented(clause);
     }
   }
+
+  void VisitWorkerClause(const OpenACCWorkerClause &clause) {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+      if (clause.hasIntExpr())
+        operation.addWorkerNumOperand(builder.getContext(),
+                                      createIntExpr(clause.getIntExpr()),
+                                      lastDeviceTypeValues);
+      else
+        operation.addEmptyWorker(builder.getContext(), lastDeviceTypeValues);
+
+    } else {
+      // TODO: When we've implemented this for everything, switch this to an
+      // unreachable. Combined constructs remain.
+      return clauseNotImplemented(clause);
+    }
+  }
+
+  void VisitVectorClause(const OpenACCVectorClause &clause) {
+    if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
+      if (clause.hasIntExpr())
+        operation.addVectorOperand(builder.getContext(),
+                                   createIntExpr(clause.getIntExpr()),
+                                   lastDeviceTypeValues);
+      else
+        operation.addEmptyVector(builder.getContext(), lastDeviceTypeValues);
+
+    } else {
+      // TODO: When we've implemented this for everything, switch this to an
+      // unreachable. Combined constructs remain.
+      return clauseNotImplemented(clause);
+    }
+  }
 };
 
 template <typename OpTy>
diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp
index b255a01adda0e..d636d1b37d969 100644
--- a/clang/test/CIR/CodeGenOpenACC/loop.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp
@@ -193,4 +193,134 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
   // CHECK: acc.yield
   // CHECK-NEXT: } loc
 
+
+#pragma acc kernels
+  {
+
+#pragma acc loop worker
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK: acc.loop worker {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop worker(N)
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
+  // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop worker device_type(nvidia, radeon) worker
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop worker(N) device_type(nvidia, radeon) worker
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
+  // CHECK-NEXT: acc.loop worker([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop worker device_type(nvidia, radeon) worker(N)
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
+  // CHECK-NEXT: acc.loop worker([#acc.device_type<none>], %[[N_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_CONV]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop worker(N) device_type(nvidia, radeon) worker(N + 1)
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
+  // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
+  // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop device_type(nvidia, radeon) worker(num:N + 1)
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
+  // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
+  // CHECK-NEXT: acc.loop worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
+
+#pragma acc loop vector
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK: acc.loop vector {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop vector(N)
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
+  // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop vector device_type(nvidia, radeon) vector
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: acc.loop vector([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop vector(N) device_type(nvidia, radeon) vector
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
+  // CHECK-NEXT: acc.loop vector([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop vector(N) device_type(nvidia, radeon) vector(N + 1)
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
+  // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
+  // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop device_type(nvidia, radeon) vector(length:N + 1)
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
+  // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
+  // CHECK-NEXT: acc.loop vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop worker vector device_type(nvidia) worker vector
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>]) vector([#acc.device_type<none>, #acc.device_type<nvidia>])
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+
+#pragma acc loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N)
+  for(unsigned I = 0; I < N; ++I);
+  // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
+  // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32
+  // CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32
+  // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type<nvidia>]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type<nvidia>]) {
+  // CHECK: acc.yield
+  // CHECK-NEXT: } loc
+  }
 }
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 41b01a14a6498..ca564037fad19 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2216,6 +2216,21 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
     // values should be integral constants, with the '*' represented as a '-1'.
     void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
                                mlir::ValueRange);
+
+    // Add a value to the 'vector' list with a current list of device_types.
+    void addVectorOperand(MLIRContext *, mlir::Value,
+                          llvm::ArrayRef<DeviceType>);
+    // Add an empty value to the 'vector' list with a current list of
+    // device_types. This is for the case where there is no expression specified
+    // in a 'vector'.
+    void addEmptyVector(MLIRContext *, llvm::ArrayRef<DeviceType>);
+    // Add a value to the 'worker' list with a current list of device_types.
+    void addWorkerNumOperand(MLIRContext *, mlir::Value,
+                             llvm::ArrayRef<DeviceType>);
+    // Add an empty value to the 'worker' list with a current list of
+    // device_types. This is for the case where there is no expression specified
+    // in a 'worker'.
+    void addEmptyWorker(MLIRContext *, llvm::ArrayRef<DeviceType>);
   }];
 
   let hasCustomAssemblyFormat = 1;
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index f26b3a5143c0b..9f4645a4a7ca8 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2720,6 +2720,34 @@ void acc::LoopOp::setTileForDeviceTypes(
   setTileOperandsSegments(segments);
 }
 
+void acc::LoopOp::addVectorOperand(
+    MLIRContext *context, mlir::Value newValue,
+    llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+  setVectorOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
+      context, getVectorOperandsDeviceTypeAttr(), effectiveDeviceTypes,
+      newValue, getVectorOperandsMutable()));
+}
+
+void acc::LoopOp::addEmptyVector(
+    MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+  setVectorAttr(addDeviceTypeAffectedOperandHelper(context, getVectorAttr(),
+                                                   effectiveDeviceTypes));
+}
+
+void acc::LoopOp::addWorkerNumOperand(
+    MLIRContext *context, mlir::Value newValue,
+    llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+  setWorkerNumOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
+      context, getWorkerNumOperandsDeviceTypeAttr(), effectiveDeviceTypes,
+      newValue, getWorkerNumOperandsMutable()));
+}
+
+void acc::LoopOp::addEmptyWorker(
+    MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+  setWorkerAttr(addDeviceTypeAffectedOperandHelper(context, getWorkerAttr(),
+                                                   effectiveDeviceTypes));
+}
+
 //===----------------------------------------------------------------------===//
 // DataOp
 //===----------------------------------------------------------------------===//

>From 8602a655a8150753542b0237fcca16d9ee1cd981 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 7 May 2025 21:55:44 +0100
Subject: [PATCH 052/115] [DenseMap] Introduce keys, values iterators (#138848)

---
 llvm/include/llvm/ADT/DenseMap.h    | 19 ++++++++++++
 llvm/unittests/ADT/DenseMapTest.cpp | 46 +++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index bb99a41646b08..3175b3ece467c 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/ADL.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/EpochTracker.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
@@ -96,6 +97,24 @@ class DenseMapBase : public DebugEpochBase {
     return makeConstIterator(getBucketsEnd(), getBucketsEnd(), *this, true);
   }
 
+  // Return an iterator to iterate over keys in the map.
+  inline auto keys() {
+    return map_range(*this, [](const BucketT &P) { return P.getFirst(); });
+  }
+
+  // Return an iterator to iterate over values in the map.
+  inline auto values() {
+    return map_range(*this, [](const BucketT &P) { return P.getSecond(); });
+  }
+
+  inline auto keys() const {
+    return map_range(*this, [](const BucketT &P) { return P.getFirst(); });
+  }
+
+  inline auto values() const {
+    return map_range(*this, [](const BucketT &P) { return P.getSecond(); });
+  }
+
   [[nodiscard]] bool empty() const { return getNumEntries() == 0; }
   unsigned size() const { return getNumEntries(); }
 
diff --git a/llvm/unittests/ADT/DenseMapTest.cpp b/llvm/unittests/ADT/DenseMapTest.cpp
index a4c045585fc28..b9d519a23c9be 100644
--- a/llvm/unittests/ADT/DenseMapTest.cpp
+++ b/llvm/unittests/ADT/DenseMapTest.cpp
@@ -10,6 +10,7 @@
 #include "CountCopyAndMove.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/DenseMapInfoVariant.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -359,6 +360,51 @@ TYPED_TEST(DenseMapTest, ConstIteratorTest) {
   EXPECT_TRUE(cit == cit2);
 }
 
+TYPED_TEST(DenseMapTest, KeysValuesIterator) {
+  SmallSet<typename TypeParam::key_type, 10> Keys;
+  SmallSet<typename TypeParam::mapped_type, 10> Values;
+  for (int I = 0; I < 10; ++I) {
+    auto K = this->getKey(I);
+    auto V = this->getValue(I);
+    Keys.insert(K);
+    Values.insert(V);
+    this->Map[K] = V;
+  }
+
+  SmallSet<typename TypeParam::key_type, 10> ActualKeys;
+  SmallSet<typename TypeParam::mapped_type, 10> ActualValues;
+  for (auto K : this->Map.keys())
+    ActualKeys.insert(K);
+  for (auto V : this->Map.values())
+    ActualValues.insert(V);
+
+  EXPECT_EQ(Keys, ActualKeys);
+  EXPECT_EQ(Values, ActualValues);
+}
+
+TYPED_TEST(DenseMapTest, ConstKeysValuesIterator) {
+  SmallSet<typename TypeParam::key_type, 10> Keys;
+  SmallSet<typename TypeParam::mapped_type, 10> Values;
+  for (int I = 0; I < 10; ++I) {
+    auto K = this->getKey(I);
+    auto V = this->getValue(I);
+    Keys.insert(K);
+    Values.insert(V);
+    this->Map[K] = V;
+  }
+
+  const TypeParam &ConstMap = this->Map;
+  SmallSet<typename TypeParam::key_type, 10> ActualKeys;
+  SmallSet<typename TypeParam::mapped_type, 10> ActualValues;
+  for (auto K : ConstMap.keys())
+    ActualKeys.insert(K);
+  for (auto V : ConstMap.values())
+    ActualValues.insert(V);
+
+  EXPECT_EQ(Keys, ActualKeys);
+  EXPECT_EQ(Values, ActualValues);
+}
+
 // Test initializer list construction.
 TEST(DenseMapCustomTest, InitializerList) {
   DenseMap<int, int> M({{0, 0}, {0, 1}, {1, 2}});

>From b8461acc5eb41ced70cc5c7f5a324cfd8bf76403 Mon Sep 17 00:00:00 2001
From: Jan Patrick Lehr <JanPatrick.Lehr at amd.com>
Date: Wed, 7 May 2025 23:01:14 +0200
Subject: [PATCH 053/115] [Flang] Fix driver test after #125643 (#138959)

---
 flang/test/Driver/mcmodel.f90 | 2 --
 1 file changed, 2 deletions(-)

diff --git a/flang/test/Driver/mcmodel.f90 b/flang/test/Driver/mcmodel.f90
index 12d90ece2f24f..8a03b17bfbcba 100644
--- a/flang/test/Driver/mcmodel.f90
+++ b/flang/test/Driver/mcmodel.f90
@@ -1,5 +1,4 @@
 ! RUN: not %flang -### -c --target=i686 -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=ERR-MEDIUM %s
-! RUN: %flang --target=x86_64 -### -c -mcmodel=tiny %s 2>&1 | FileCheck --check-prefix=TINY %s
 ! RUN: %flang --target=x86_64 -### -c -mcmodel=small %s 2>&1 | FileCheck --check-prefix=SMALL %s
 ! RUN: %flang --target=x86_64 -### -S -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=KERNEL %s
 ! RUN: %flang --target=x86_64 -### -c -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s
@@ -41,4 +40,3 @@
 
 ! AARCH64-PIC-LARGE: error: invalid argument '-mcmodel=large' only allowed with '-fno-pic'
 ! ERR-AARCH64_32: error: unsupported argument 'small' to option '-mcmodel=' for target 'aarch64_32-unknown-linux'
-

>From 1698beb5420f6e6f7eed5d9914ab6a10ff5f4b1f Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 7 May 2025 14:02:40 -0700
Subject: [PATCH 054/115] [clang][modules][deps] Optimize in-process
 timestamping of PCMs (#137363)

In the past, timestamps used for
`-fmodules-validate-once-per-build-session` were found to be a source of
contention in the dependency scanner
([D149802](https://reviews.llvm.org/D149802),
https://github.com/llvm/llvm-project/pull/112452). This PR is yet
another attempt to optimize these. We now make use of the new
`ModuleCache` interface to implement the in-process version in terms of
atomic `std::time_t` variables rather the mtime attribute on
`.timestamp` files.
---
 .../include/clang/Serialization/ModuleCache.h | 16 ++++++-
 .../DependencyScanningService.h               | 15 ++++--
 .../DependencyScanning/InProcessModuleCache.h | 11 +++--
 clang/lib/Serialization/ASTCommon.cpp         | 12 -----
 clang/lib/Serialization/ASTCommon.h           |  2 -
 clang/lib/Serialization/ASTReader.cpp         |  3 +-
 clang/lib/Serialization/ASTWriter.cpp         |  2 +-
 clang/lib/Serialization/ModuleCache.cpp       | 23 +++++++++
 clang/lib/Serialization/ModuleManager.cpp     | 12 ++---
 .../DependencyScanningService.cpp             |  6 ++-
 .../DependencyScanningWorker.cpp              |  6 ++-
 .../InProcessModuleCache.cpp                  | 48 ++++++++++++++-----
 12 files changed, 109 insertions(+), 47 deletions(-)

diff --git a/clang/include/clang/Serialization/ModuleCache.h b/clang/include/clang/Serialization/ModuleCache.h
index a7ba26bc4daae..3117d954a09cc 100644
--- a/clang/include/clang/Serialization/ModuleCache.h
+++ b/clang/include/clang/Serialization/ModuleCache.h
@@ -12,6 +12,8 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 
+#include <ctime>
+
 namespace llvm {
 class AdvisoryLock;
 } // namespace llvm
@@ -31,11 +33,23 @@ class ModuleCache : public RefCountedBase<ModuleCache> {
   virtual std::unique_ptr<llvm::AdvisoryLock>
   getLock(StringRef ModuleFilename) = 0;
 
+  // TODO: Abstract away timestamps with isUpToDate() and markUpToDate().
+  // TODO: Consider exposing a "validation lock" API to prevent multiple clients
+  // concurrently noticing an out-of-date module file and validating its inputs.
+
+  /// Returns the timestamp denoting the last time inputs of the module file
+  /// were validated.
+  virtual std::time_t getModuleTimestamp(StringRef ModuleFilename) = 0;
+
+  /// Updates the timestamp denoting the last time inputs of the module file
+  /// were validated.
+  virtual void updateModuleTimestamp(StringRef ModuleFilename) = 0;
+
   /// Returns this process's view of the module cache.
   virtual InMemoryModuleCache &getInMemoryModuleCache() = 0;
   virtual const InMemoryModuleCache &getInMemoryModuleCache() const = 0;
 
-  // TODO: Virtualize writing/reading PCM files, timestamping, pruning, etc.
+  // TODO: Virtualize writing/reading PCM files, pruning, etc.
 
   virtual ~ModuleCache() = default;
 };
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
index 5e8b37e791383..4e97c7bc9f36e 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
@@ -12,6 +12,7 @@
 #include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
 #include "clang/Tooling/DependencyScanning/InProcessModuleCache.h"
 #include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/Support/Chrono.h"
 
 namespace clang {
 namespace tooling {
@@ -84,7 +85,9 @@ class DependencyScanningService {
   DependencyScanningService(
       ScanningMode Mode, ScanningOutputFormat Format,
       ScanningOptimizations OptimizeArgs = ScanningOptimizations::Default,
-      bool EagerLoadModules = false, bool TraceVFS = false);
+      bool EagerLoadModules = false, bool TraceVFS = false,
+      std::time_t BuildSessionTimestamp =
+          llvm::sys::toTimeT(std::chrono::system_clock::now()));
 
   ScanningMode getMode() const { return Mode; }
 
@@ -100,7 +103,9 @@ class DependencyScanningService {
     return SharedCache;
   }
 
-  ModuleCacheMutexes &getModuleCacheMutexes() { return ModCacheMutexes; }
+  ModuleCacheEntries &getModuleCacheEntries() { return ModCacheEntries; }
+
+  std::time_t getBuildSessionTimestamp() const { return BuildSessionTimestamp; }
 
 private:
   const ScanningMode Mode;
@@ -113,8 +118,10 @@ class DependencyScanningService {
   const bool TraceVFS;
   /// The global file system cache.
   DependencyScanningFilesystemSharedCache SharedCache;
-  /// The global module cache mutexes.
-  ModuleCacheMutexes ModCacheMutexes;
+  /// The global module cache entries.
+  ModuleCacheEntries ModCacheEntries;
+  /// The build session timestamp.
+  std::time_t BuildSessionTimestamp;
 };
 
 } // end namespace dependencies
diff --git a/clang/include/clang/Tooling/DependencyScanning/InProcessModuleCache.h b/clang/include/clang/Tooling/DependencyScanning/InProcessModuleCache.h
index ba0454380b665..213e60b39c199 100644
--- a/clang/include/clang/Tooling/DependencyScanning/InProcessModuleCache.h
+++ b/clang/include/clang/Tooling/DependencyScanning/InProcessModuleCache.h
@@ -18,13 +18,18 @@
 namespace clang {
 namespace tooling {
 namespace dependencies {
-struct ModuleCacheMutexes {
+struct ModuleCacheEntry {
+  std::shared_mutex CompilationMutex;
+  std::atomic<std::time_t> Timestamp = 0;
+};
+
+struct ModuleCacheEntries {
   std::mutex Mutex;
-  llvm::StringMap<std::unique_ptr<std::shared_mutex>> Map;
+  llvm::StringMap<std::unique_ptr<ModuleCacheEntry>> Map;
 };
 
 IntrusiveRefCntPtr<ModuleCache>
-makeInProcessModuleCache(ModuleCacheMutexes &Mutexes);
+makeInProcessModuleCache(ModuleCacheEntries &Entries);
 } // namespace dependencies
 } // namespace tooling
 } // namespace clang
diff --git a/clang/lib/Serialization/ASTCommon.cpp b/clang/lib/Serialization/ASTCommon.cpp
index 320ee0e65dbea..ad277f19711ff 100644
--- a/clang/lib/Serialization/ASTCommon.cpp
+++ b/clang/lib/Serialization/ASTCommon.cpp
@@ -510,15 +510,3 @@ bool serialization::needsAnonymousDeclarationNumber(const NamedDecl *D) {
     return false;
   return isa<TagDecl, FieldDecl>(D);
 }
-
-void serialization::updateModuleTimestamp(StringRef ModuleFilename) {
-  // Overwrite the timestamp file contents so that file's mtime changes.
-  std::error_code EC;
-  llvm::raw_fd_ostream OS(ModuleFile::getTimestampFilename(ModuleFilename), EC,
-                          llvm::sys::fs::OF_TextWithCRLF);
-  if (EC)
-    return;
-  OS << "Timestamp file\n";
-  OS.close();
-  OS.clear_error(); // Avoid triggering a fatal error.
-}
diff --git a/clang/lib/Serialization/ASTCommon.h b/clang/lib/Serialization/ASTCommon.h
index ed6b8d00115ea..371db4bace013 100644
--- a/clang/lib/Serialization/ASTCommon.h
+++ b/clang/lib/Serialization/ASTCommon.h
@@ -100,8 +100,6 @@ inline bool isPartOfPerModuleInitializer(const Decl *D) {
   return false;
 }
 
-void updateModuleTimestamp(StringRef ModuleFilename);
-
 } // namespace serialization
 
 } // namespace clang
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index e47bac0261356..a17d6229ee3a1 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -4952,7 +4952,8 @@ ASTReader::ASTReadResult ASTReader::ReadAST(StringRef FileName, ModuleKind Type,
       ImportedModule &M = Loaded[I];
       if (M.Mod->Kind == MK_ImplicitModule &&
           M.Mod->InputFilesValidationTimestamp < HSOpts.BuildSessionTimestamp)
-        updateModuleTimestamp(M.Mod->FileName);
+        getModuleManager().getModuleCache().updateModuleTimestamp(
+            M.Mod->FileName);
     }
   }
 
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 8c5adc3959398..cccf53de25882 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -5394,7 +5394,7 @@ ASTWriter::WriteAST(llvm::PointerUnion<Sema *, Preprocessor *> Subject,
   if (WritingModule && PPRef.getHeaderSearchInfo()
                            .getHeaderSearchOpts()
                            .ModulesValidateOncePerBuildSession)
-    updateModuleTimestamp(OutputFile);
+    ModCache.updateModuleTimestamp(OutputFile);
 
   if (ShouldCacheASTInMemory) {
     // Construct MemoryBuffer and update buffer manager.
diff --git a/clang/lib/Serialization/ModuleCache.cpp b/clang/lib/Serialization/ModuleCache.cpp
index 955e5f322bcc3..4ae49c4ec9a05 100644
--- a/clang/lib/Serialization/ModuleCache.cpp
+++ b/clang/lib/Serialization/ModuleCache.cpp
@@ -9,6 +9,7 @@
 #include "clang/Serialization/ModuleCache.h"
 
 #include "clang/Serialization/InMemoryModuleCache.h"
+#include "clang/Serialization/ModuleFile.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/LockFileManager.h"
 #include "llvm/Support/Path.h"
@@ -32,6 +33,28 @@ class CrossProcessModuleCache : public ModuleCache {
     return std::make_unique<llvm::LockFileManager>(ModuleFilename);
   }
 
+  std::time_t getModuleTimestamp(StringRef ModuleFilename) override {
+    std::string TimestampFilename =
+        serialization::ModuleFile::getTimestampFilename(ModuleFilename);
+    llvm::sys::fs::file_status Status;
+    if (llvm::sys::fs::status(ModuleFilename, Status) != std::error_code{})
+      return 0;
+    return llvm::sys::toTimeT(Status.getLastModificationTime());
+  }
+
+  void updateModuleTimestamp(StringRef ModuleFilename) override {
+    // Overwrite the timestamp file contents so that file's mtime changes.
+    std::error_code EC;
+    llvm::raw_fd_ostream OS(
+        serialization::ModuleFile::getTimestampFilename(ModuleFilename), EC,
+        llvm::sys::fs::OF_TextWithCRLF);
+    if (EC)
+      return;
+    OS << "Timestamp file\n";
+    OS.close();
+    OS.clear_error(); // Avoid triggering a fatal error.
+  }
+
   InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; }
   const InMemoryModuleCache &getInMemoryModuleCache() const override {
     return InMemory;
diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp
index d466ea06301a6..e3d7ff4fd82a7 100644
--- a/clang/lib/Serialization/ModuleManager.cpp
+++ b/clang/lib/Serialization/ModuleManager.cpp
@@ -174,15 +174,9 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
   NewModule->ImportLoc = ImportLoc;
   NewModule->InputFilesValidationTimestamp = 0;
 
-  if (NewModule->Kind == MK_ImplicitModule) {
-    std::string TimestampFilename =
-        ModuleFile::getTimestampFilename(NewModule->FileName);
-    llvm::vfs::Status Status;
-    // A cached stat value would be fine as well.
-    if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
-      NewModule->InputFilesValidationTimestamp =
-          llvm::sys::toTimeT(Status.getLastModificationTime());
-  }
+  if (NewModule->Kind == MK_ImplicitModule)
+    NewModule->InputFilesValidationTimestamp =
+        ModCache->getModuleTimestamp(NewModule->FileName);
 
   // Load the contents of the module
   if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) {
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
index 96fe40c079c65..7f40c99f07287 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
@@ -14,6 +14,8 @@ using namespace dependencies;
 
 DependencyScanningService::DependencyScanningService(
     ScanningMode Mode, ScanningOutputFormat Format,
-    ScanningOptimizations OptimizeArgs, bool EagerLoadModules, bool TraceVFS)
+    ScanningOptimizations OptimizeArgs, bool EagerLoadModules, bool TraceVFS,
+    std::time_t BuildSessionTimestamp)
     : Mode(Mode), Format(Format), OptimizeArgs(OptimizeArgs),
-      EagerLoadModules(EagerLoadModules), TraceVFS(TraceVFS) {}
+      EagerLoadModules(EagerLoadModules), TraceVFS(TraceVFS),
+      BuildSessionTimestamp(BuildSessionTimestamp) {}
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 2443918526263..5c9cf3e416ca5 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -411,7 +411,7 @@ class DependencyScanningAction : public tooling::ToolAction {
     Scanned = true;
 
     // Create a compiler instance to handle the actual work.
-    auto ModCache = makeInProcessModuleCache(Service.getModuleCacheMutexes());
+    auto ModCache = makeInProcessModuleCache(Service.getModuleCacheEntries());
     ScanInstanceStorage.emplace(std::move(Invocation),
                                 std::move(PCHContainerOps), ModCache.get());
     CompilerInstance &ScanInstance = *ScanInstanceStorage;
@@ -428,6 +428,10 @@ class DependencyScanningAction : public tooling::ToolAction {
     ScanInstance.getPreprocessorOpts().AllowPCHWithDifferentModulesCachePath =
         true;
 
+    if (ScanInstance.getHeaderSearchOpts().ModulesValidateOncePerBuildSession)
+      ScanInstance.getHeaderSearchOpts().BuildSessionTimestamp =
+          Service.getBuildSessionTimestamp();
+
     ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
     ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
     // This will prevent us compiling individual modules asynchronously since
diff --git a/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp b/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
index 71ce4d098932b..eb05821e2bc02 100644
--- a/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
+++ b/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
@@ -10,6 +10,7 @@
 
 #include "clang/Serialization/InMemoryModuleCache.h"
 #include "llvm/Support/AdvisoryLock.h"
+#include "llvm/Support/Chrono.h"
 
 #include <mutex>
 
@@ -50,7 +51,7 @@ class ReaderWriterLock : public llvm::AdvisoryLock {
 };
 
 class InProcessModuleCache : public ModuleCache {
-  ModuleCacheMutexes &Mutexes;
+  ModuleCacheEntries &Entries;
 
   // TODO: If we changed the InMemoryModuleCache API and relied on strict
   // context hash, we could probably create more efficient thread-safe
@@ -59,19 +60,44 @@ class InProcessModuleCache : public ModuleCache {
   InMemoryModuleCache InMemory;
 
 public:
-  InProcessModuleCache(ModuleCacheMutexes &Mutexes) : Mutexes(Mutexes) {}
+  InProcessModuleCache(ModuleCacheEntries &Entries) : Entries(Entries) {}
 
   void prepareForGetLock(StringRef Filename) override {}
 
   std::unique_ptr<llvm::AdvisoryLock> getLock(StringRef Filename) override {
-    auto &Mtx = [&]() -> std::shared_mutex & {
-      std::lock_guard<std::mutex> Lock(Mutexes.Mutex);
-      auto &Mutex = Mutexes.Map[Filename];
-      if (!Mutex)
-        Mutex = std::make_unique<std::shared_mutex>();
-      return *Mutex;
+    auto &CompilationMutex = [&]() -> std::shared_mutex & {
+      std::lock_guard Lock(Entries.Mutex);
+      auto &Entry = Entries.Map[Filename];
+      if (!Entry)
+        Entry = std::make_unique<ModuleCacheEntry>();
+      return Entry->CompilationMutex;
     }();
-    return std::make_unique<ReaderWriterLock>(Mtx);
+    return std::make_unique<ReaderWriterLock>(CompilationMutex);
+  }
+
+  std::time_t getModuleTimestamp(StringRef Filename) override {
+    auto &Timestamp = [&]() -> std::atomic<std::time_t> & {
+      std::lock_guard Lock(Entries.Mutex);
+      auto &Entry = Entries.Map[Filename];
+      if (!Entry)
+        Entry = std::make_unique<ModuleCacheEntry>();
+      return Entry->Timestamp;
+    }();
+
+    return Timestamp.load();
+  }
+
+  void updateModuleTimestamp(StringRef Filename) override {
+    // Note: This essentially replaces FS contention with mutex contention.
+    auto &Timestamp = [&]() -> std::atomic<std::time_t> & {
+      std::lock_guard Lock(Entries.Mutex);
+      auto &Entry = Entries.Map[Filename];
+      if (!Entry)
+        Entry = std::make_unique<ModuleCacheEntry>();
+      return Entry->Timestamp;
+    }();
+
+    Timestamp.store(llvm::sys::toTimeT(std::chrono::system_clock::now()));
   }
 
   InMemoryModuleCache &getInMemoryModuleCache() override { return InMemory; }
@@ -82,6 +108,6 @@ class InProcessModuleCache : public ModuleCache {
 } // namespace
 
 IntrusiveRefCntPtr<ModuleCache>
-dependencies::makeInProcessModuleCache(ModuleCacheMutexes &Mutexes) {
-  return llvm::makeIntrusiveRefCnt<InProcessModuleCache>(Mutexes);
+dependencies::makeInProcessModuleCache(ModuleCacheEntries &Entries) {
+  return llvm::makeIntrusiveRefCnt<InProcessModuleCache>(Entries);
 }

>From 6babd63a4bbc094bee4ef8e75f95dccd32325c15 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Thu, 8 May 2025 00:03:23 +0300
Subject: [PATCH 055/115] [XCore] TableGen-erate SDNode descriptions (#138869)

This consolidates node definitions into one place and enables automatic
node verification.

Part of #119709.
---
 llvm/lib/Target/XCore/CMakeLists.txt          |  1 +
 llvm/lib/Target/XCore/XCoreISelLowering.cpp   | 27 --------
 llvm/lib/Target/XCore/XCoreISelLowering.h     | 63 -------------------
 .../Target/XCore/XCoreSelectionDAGInfo.cpp    |  8 +++
 llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h |  9 ++-
 5 files changed, 16 insertions(+), 92 deletions(-)

diff --git a/llvm/lib/Target/XCore/CMakeLists.txt b/llvm/lib/Target/XCore/CMakeLists.txt
index 447f5c5452885..f411c658b43b0 100644
--- a/llvm/lib/Target/XCore/CMakeLists.txt
+++ b/llvm/lib/Target/XCore/CMakeLists.txt
@@ -8,6 +8,7 @@ tablegen(LLVM XCoreGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM XCoreGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM XCoreGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM XCoreGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM XCoreGenSDNodeInfo.inc -gen-sd-node-info)
 tablegen(LLVM XCoreGenSubtargetInfo.inc -gen-subtarget)
 
 add_public_tablegen_target(XCoreCommonTableGen)
diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index ac199230b2c07..1c6e294597c34 100644
--- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -40,33 +40,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "xcore-lower"
 
-const char *XCoreTargetLowering::
-getTargetNodeName(unsigned Opcode) const
-{
-  switch ((XCoreISD::NodeType)Opcode)
-  {
-    case XCoreISD::FIRST_NUMBER      : break;
-    case XCoreISD::BL                : return "XCoreISD::BL";
-    case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
-    case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
-    case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
-    case XCoreISD::LDWSP             : return "XCoreISD::LDWSP";
-    case XCoreISD::STWSP             : return "XCoreISD::STWSP";
-    case XCoreISD::RETSP             : return "XCoreISD::RETSP";
-    case XCoreISD::LADD              : return "XCoreISD::LADD";
-    case XCoreISD::LSUB              : return "XCoreISD::LSUB";
-    case XCoreISD::LMUL              : return "XCoreISD::LMUL";
-    case XCoreISD::MACCU             : return "XCoreISD::MACCU";
-    case XCoreISD::MACCS             : return "XCoreISD::MACCS";
-    case XCoreISD::CRC8              : return "XCoreISD::CRC8";
-    case XCoreISD::BR_JT             : return "XCoreISD::BR_JT";
-    case XCoreISD::BR_JT32           : return "XCoreISD::BR_JT32";
-    case XCoreISD::FRAME_TO_ARGS_OFFSET : return "XCoreISD::FRAME_TO_ARGS_OFFSET";
-    case XCoreISD::EH_RETURN         : return "XCoreISD::EH_RETURN";
-  }
-  return nullptr;
-}
-
 XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
                                          const XCoreSubtarget &Subtarget)
     : TargetLowering(TM), TM(TM), Subtarget(Subtarget) {
diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.h b/llvm/lib/Target/XCore/XCoreISelLowering.h
index 1e036ea316978..bad6588cad740 100644
--- a/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -23,65 +23,6 @@ namespace llvm {
   // Forward delcarations
   class XCoreSubtarget;
 
-  namespace XCoreISD {
-    enum NodeType : unsigned {
-      // Start the numbering where the builtin ops and target ops leave off.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
-      // Branch and link (call)
-      BL,
-
-      // pc relative address
-      PCRelativeWrapper,
-
-      // dp relative address
-      DPRelativeWrapper,
-
-      // cp relative address
-      CPRelativeWrapper,
-
-      // Load word from stack
-      LDWSP,
-
-      // Store word to stack
-      STWSP,
-
-      // Corresponds to retsp instruction
-      RETSP,
-
-      // Corresponds to LADD instruction
-      LADD,
-
-      // Corresponds to LSUB instruction
-      LSUB,
-
-      // Corresponds to LMUL instruction
-      LMUL,
-
-      // Corresponds to MACCU instruction
-      MACCU,
-
-      // Corresponds to MACCS instruction
-      MACCS,
-
-      // Corresponds to CRC8 instruction
-      CRC8,
-
-      // Jumptable branch.
-      BR_JT,
-
-      // Jumptable branch using long branches for each entry.
-      BR_JT32,
-
-      // Offset from frame pointer to the first (possible) on-stack argument
-      FRAME_TO_ARGS_OFFSET,
-
-      // Exception handler return. The stack is restored to the first
-      // followed by a jump to the second argument.
-      EH_RETURN,
-    };
-  }
-
   //===--------------------------------------------------------------------===//
   // TargetLowering Implementation
   //===--------------------------------------------------------------------===//
@@ -109,10 +50,6 @@ namespace llvm {
     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
                             SelectionDAG &DAG) const override;
 
-    /// getTargetNodeName - This method returns the name of a target specific
-    //  DAG node.
-    const char *getTargetNodeName(unsigned Opcode) const override;
-
     MachineBasicBlock *
     EmitInstrWithCustomInserter(MachineInstr &MI,
                                 MachineBasicBlock *MBB) const override;
diff --git a/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 0d097076348ca..bc34ab4319690 100644
--- a/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -10,11 +10,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "XCoreSelectionDAGInfo.h"
 #include "XCoreTargetMachine.h"
+
+#define GET_SDNODE_DESC
+#include "XCoreGenSDNodeInfo.inc"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "xcore-selectiondag-info"
 
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo()
+    : SelectionDAGGenTargetInfo(XCoreGenSDNodeInfo) {}
+
 SDValue XCoreSelectionDAGInfo::EmitTargetCodeForMemcpy(
     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
diff --git a/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h b/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 2abf526779785..4a2848229aca9 100644
--- a/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -15,10 +15,15 @@
 
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 
+#define GET_SDNODE_ENUM
+#include "XCoreGenSDNodeInfo.inc"
+
 namespace llvm {
 
-class XCoreSelectionDAGInfo : public SelectionDAGTargetInfo {
+class XCoreSelectionDAGInfo : public SelectionDAGGenTargetInfo {
 public:
+  XCoreSelectionDAGInfo();
+
   SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
                                   SDValue Chain, SDValue Op1, SDValue Op2,
                                   SDValue Op3, Align Alignment, bool isVolatile,
@@ -27,6 +32,6 @@ class XCoreSelectionDAGInfo : public SelectionDAGTargetInfo {
                                   MachinePointerInfo SrcPtrInfo) const override;
 };
 
-}
+} // namespace llvm
 
 #endif

>From 90d8e4de117f0cbe7eb1f69ee45c0e79d42d3667 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 7 May 2025 14:34:22 -0700
Subject: [PATCH 056/115] [Tooling] Fix warnings

This patch fixes:

  clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp:69:7:
  error: 'std::lock_guard' may not intend to support class template
  argument deduction [-Werror,-Wctad-maybe-unsupported]

  clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp:80:7:
  error: 'std::lock_guard' may not intend to support class template
  argument deduction [-Werror,-Wctad-maybe-unsupported]

  clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp:93:7:
  error: 'std::lock_guard' may not intend to support class template
  argument deduction [-Werror,-Wctad-maybe-unsupported]
---
 .../lib/Tooling/DependencyScanning/InProcessModuleCache.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp b/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
index eb05821e2bc02..80db2d47d940e 100644
--- a/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
+++ b/clang/lib/Tooling/DependencyScanning/InProcessModuleCache.cpp
@@ -66,7 +66,7 @@ class InProcessModuleCache : public ModuleCache {
 
   std::unique_ptr<llvm::AdvisoryLock> getLock(StringRef Filename) override {
     auto &CompilationMutex = [&]() -> std::shared_mutex & {
-      std::lock_guard Lock(Entries.Mutex);
+      std::lock_guard<std::mutex> Lock(Entries.Mutex);
       auto &Entry = Entries.Map[Filename];
       if (!Entry)
         Entry = std::make_unique<ModuleCacheEntry>();
@@ -77,7 +77,7 @@ class InProcessModuleCache : public ModuleCache {
 
   std::time_t getModuleTimestamp(StringRef Filename) override {
     auto &Timestamp = [&]() -> std::atomic<std::time_t> & {
-      std::lock_guard Lock(Entries.Mutex);
+      std::lock_guard<std::mutex> Lock(Entries.Mutex);
       auto &Entry = Entries.Map[Filename];
       if (!Entry)
         Entry = std::make_unique<ModuleCacheEntry>();
@@ -90,7 +90,7 @@ class InProcessModuleCache : public ModuleCache {
   void updateModuleTimestamp(StringRef Filename) override {
     // Note: This essentially replaces FS contention with mutex contention.
     auto &Timestamp = [&]() -> std::atomic<std::time_t> & {
-      std::lock_guard Lock(Entries.Mutex);
+      std::lock_guard<std::mutex> Lock(Entries.Mutex);
       auto &Entry = Entries.Map[Filename];
       if (!Entry)
         Entry = std::make_unique<ModuleCacheEntry>();

>From 5d7e8ac53badb45f29d88b133fccb3fb8eed787a Mon Sep 17 00:00:00 2001
From: Ryosuke Niwa <rniwa at webkit.org>
Date: Wed, 7 May 2025 15:07:41 -0700
Subject: [PATCH 057/115] [webkit.UncountedLambdaCapturesChecker] Treat a copy
 capture of a CheckedPtr object as safe (#138068)

Allow copy capture of a reference to a CheckedPtr capable object since
such a capture will copy the said object instead of keeping a dangling
reference to the object.
---
 .../WebKit/RawPtrRefLambdaCapturesChecker.cpp       |  3 +++
 .../Checkers/WebKit/uncounted-lambda-captures.cpp   | 13 +++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp
index 0a658b59ad8c5..01faa9217982d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/RawPtrRefLambdaCapturesChecker.cpp
@@ -381,6 +381,9 @@ class RawPtrRefLambdaCapturesChecker
         }
         QualType CapturedVarQualType = CapturedVar->getType();
         auto IsUncountedPtr = isUnsafePtr(CapturedVar->getType());
+        if (C.getCaptureKind() == LCK_ByCopy &&
+            CapturedVarQualType->isReferenceType())
+          continue;
         if (IsUncountedPtr && *IsUncountedPtr)
           reportBug(C, CapturedVar, CapturedVarQualType, L);
       } else if (C.capturesThis() && shouldCheckThis) {
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
index daa15d55aee5a..6b7593a821c64 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-lambda-captures.cpp
@@ -137,13 +137,11 @@ void references() {
   RefCountable automatic;
   RefCountable& ref_countable_ref = automatic;
   auto foo1 = [ref_countable_ref](){ ref_countable_ref.constMethod(); };
-  // expected-warning at -1{{Captured reference 'ref_countable_ref' to uncounted type is unsafe [webkit.UncountedLambdaCapturesChecker]}}
   auto foo2 = [&ref_countable_ref](){ ref_countable_ref.method(); };
   // expected-warning at -1{{Captured reference 'ref_countable_ref' to uncounted type is unsafe [webkit.UncountedLambdaCapturesChecker]}}
   auto foo3 = [&](){ ref_countable_ref.method(); };
   // expected-warning at -1{{Implicitly captured reference 'ref_countable_ref' to uncounted type is unsafe [webkit.UncountedLambdaCapturesChecker]}}
   auto foo4 = [=](){ ref_countable_ref.constMethod(); };
-  // expected-warning at -1{{Implicitly captured reference 'ref_countable_ref' to uncounted type is unsafe [webkit.UncountedLambdaCapturesChecker]}}
 
   call(foo1);
   call(foo2);
@@ -407,3 +405,14 @@ void lambda_converted_to_function(RefCountable* obj)
     // expected-warning at -1{{Implicitly captured raw-pointer 'obj' to uncounted type is unsafe [webkit.UncountedLambdaCapturesChecker]}}
   });
 }
+
+void capture_copy_in_lambda(CheckedObj& checked) {
+  callFunctionOpaque([checked]() mutable {
+    checked.method();
+  });
+  auto* ptr = &checked;
+  callFunctionOpaque([ptr]() mutable {
+    // expected-warning at -1{{Captured raw-pointer 'ptr' to uncounted type is unsafe [webkit.UncountedLambdaCapturesChecker]}}
+    ptr->method();
+  });
+}

>From 94d933676c34c9179bf55387d3f9b97e12fae94b Mon Sep 17 00:00:00 2001
From: Chengjun <chengjunp at Nvidia.com>
Date: Wed, 7 May 2025 15:25:48 -0700
Subject: [PATCH 058/115] [AA] Move Target Specific AA before BasicAA (#125965)

In this change, NVPTX AA is moved before Basic AA to potentially improve
compile time. Additionally, it introduces a flag in the
`ExternalAAWrapper` that allows other backends to run their
target-specific AA passes before Basic AA, if desired.

The change works for both New Pass Manager and Legacy Pass Manager.

Original implementation by Princeton Ferro <pferro at nvidia.com>
---
 llvm/include/llvm/Analysis/AliasAnalysis.h    | 12 ++++++
 llvm/include/llvm/Target/TargetMachine.h      |  5 +++
 llvm/lib/Analysis/AliasAnalysis.cpp           | 37 +++++++++++++++----
 llvm/lib/Passes/PassBuilderPipelines.cpp      |  4 ++
 llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h    |  7 ++++
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp  |  7 +---
 llvm/lib/Target/NVPTX/NVPTXTargetMachine.h    |  2 +-
 .../NVPTXAA/NVPTXAA_before_BasicAA.ll         | 17 +++++++++
 8 files changed, 77 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index b3b44a50ca827..d23b81854c9ea 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -1013,6 +1013,18 @@ struct ExternalAAWrapperPass : ImmutablePass {
 
   explicit ExternalAAWrapperPass(CallbackT CB);
 
+  /// Returns whether this external AA should run before Basic AA.
+  ///
+  /// By default, external AA passes are run after Basic AA. If this returns
+  /// true, the external AA will be run before Basic AA during alias analysis.
+  ///
+  /// For some targets, we prefer to run the external AA early to improve
+  /// compile time as it has more target-specific information. This is
+  /// particularly useful when the external AA can provide more precise results
+  /// than Basic AA so that Basic AA does not need to spend time recomputing
+  /// them.
+  virtual bool runEarly() { return false; }
+
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
   }
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 566e7dba6792b..906926729ed74 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -396,6 +396,11 @@ class TargetMachine {
   // TODO: Populate all pass names by using <Target>PassRegistry.def.
   virtual void registerPassBuilderCallbacks(PassBuilder &) {}
 
+  /// Allow the target to register early alias analyses (AA before BasicAA) with
+  /// the AAManager for use with the new pass manager. Only affects the
+  /// "default" AAManager.
+  virtual void registerEarlyDefaultAliasAnalyses(AAManager &) {}
+
   /// Allow the target to register alias analyses with the AAManager for use
   /// with the new pass manager. Only affects the "default" AAManager.
   virtual void registerDefaultAliasAnalyses(AAManager &) {}
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index f4946c30de9bc..27bd179a58ede 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -739,28 +739,49 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) {
   AAR.reset(
       new AAResults(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F)));
 
+  // Add any target-specific alias analyses that should be run early.
+  auto *ExtWrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>();
+  if (ExtWrapperPass && ExtWrapperPass->runEarly() && ExtWrapperPass->CB) {
+    LLVM_DEBUG(dbgs() << "AAResults register Early ExternalAA: "
+                      << ExtWrapperPass->getPassName() << "\n");
+    ExtWrapperPass->CB(*this, F, *AAR);
+  }
+
   // BasicAA is always available for function analyses. Also, we add it first
   // so that it can trump TBAA results when it proves MustAlias.
   // FIXME: TBAA should have an explicit mode to support this and then we
   // should reconsider the ordering here.
-  if (!DisableBasicAA)
+  if (!DisableBasicAA) {
+    LLVM_DEBUG(dbgs() << "AAResults register BasicAA\n");
     AAR->addAAResult(getAnalysis<BasicAAWrapperPass>().getResult());
+  }
 
   // Populate the results with the currently available AAs.
-  if (auto *WrapperPass = getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+  if (auto *WrapperPass =
+          getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>()) {
+    LLVM_DEBUG(dbgs() << "AAResults register ScopedNoAliasAA\n");
     AAR->addAAResult(WrapperPass->getResult());
-  if (auto *WrapperPass = getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+  }
+  if (auto *WrapperPass = getAnalysisIfAvailable<TypeBasedAAWrapperPass>()) {
+    LLVM_DEBUG(dbgs() << "AAResults register TypeBasedAA\n");
     AAR->addAAResult(WrapperPass->getResult());
-  if (auto *WrapperPass = getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+  }
+  if (auto *WrapperPass = getAnalysisIfAvailable<GlobalsAAWrapperPass>()) {
+    LLVM_DEBUG(dbgs() << "AAResults register GlobalsAA\n");
     AAR->addAAResult(WrapperPass->getResult());
-  if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>())
+  }
+  if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>()) {
+    LLVM_DEBUG(dbgs() << "AAResults register SCEVAA\n");
     AAR->addAAResult(WrapperPass->getResult());
+  }
 
   // If available, run an external AA providing callback over the results as
   // well.
-  if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
-    if (WrapperPass->CB)
-      WrapperPass->CB(*this, F, *AAR);
+  if (ExtWrapperPass && !ExtWrapperPass->runEarly() && ExtWrapperPass->CB) {
+    LLVM_DEBUG(dbgs() << "AAResults register Late ExternalAA: "
+                      << ExtWrapperPass->getPassName() << "\n");
+    ExtWrapperPass->CB(*this, F, *AAR);
+  }
 
   // Analyses don't mutate the IR, so return false.
   return false;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index f172271be09ab..5a85b308925a6 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -2320,6 +2320,10 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
   // The order in which these are registered determines their priority when
   // being queried.
 
+  // Add any target-specific alias analyses that should be run early.
+  if (TM)
+    TM->registerEarlyDefaultAliasAnalyses(AA);
+
   // First we register the basic alias analysis that provides the majority of
   // per-function local AA logic. This is a stateless, on-demand local set of
   // AA techniques.
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
index a82c3aaa72423..430fcd741c1b6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
@@ -85,16 +85,23 @@ class NVPTXAAWrapperPass : public ImmutablePass {
 
 // Wrapper around ExternalAAWrapperPass so that the default
 // constructor gets the callback.
+// Note that NVPTXAA will run before BasicAA for compile time considerations.
 class NVPTXExternalAAWrapper : public ExternalAAWrapperPass {
 public:
   static char ID;
 
+  bool runEarly() override { return true; }
+
   NVPTXExternalAAWrapper()
       : ExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
           if (auto *WrapperPass =
                   P.getAnalysisIfAvailable<NVPTXAAWrapperPass>())
             AAR.addAAResult(WrapperPass->getResult());
         }) {}
+
+  StringRef getPassName() const override {
+    return "NVPTX Address space based Alias Analysis Wrapper";
+  }
 };
 
 ImmutablePass *createNVPTXAAWrapperPass();
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index f2e149063764d..85d28a703a4cb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -234,7 +234,7 @@ MachineFunctionInfo *NVPTXTargetMachine::createMachineFunctionInfo(
                                                                     F, STI);
 }
 
-void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
+void NVPTXTargetMachine::registerEarlyDefaultAliasAnalyses(AAManager &AAM) {
   AAM.registerFunctionAnalysis<NVPTXAA>();
 }
 
@@ -349,10 +349,7 @@ void NVPTXPassConfig::addIRPasses() {
   disablePass(&RemoveLoadsIntoFakeUsesID);
 
   addPass(createNVPTXAAWrapperPass());
-  addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
-    if (auto *WrapperPass = P.getAnalysisIfAvailable<NVPTXAAWrapperPass>())
-      AAR.addAAResult(WrapperPass->getResult());
-  }));
+  addPass(createNVPTXExternalAAWrapperPass());
 
   // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
   // it here does nothing.  But since we need it for correctness when lowering
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 34d841cd28404..118a01a0352f5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -64,7 +64,7 @@ class NVPTXTargetMachine : public CodeGenTargetMachineImpl {
   createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
                             const TargetSubtargetInfo *STI) const override;
 
-  void registerDefaultAliasAnalyses(AAManager &AAM) override;
+  void registerEarlyDefaultAliasAnalyses(AAManager &AAM) override;
 
   void registerPassBuilderCallbacks(PassBuilder &PB) override;
 
diff --git a/llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll b/llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll
new file mode 100644
index 0000000000000..0d16b344e9f66
--- /dev/null
+++ b/llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll
@@ -0,0 +1,17 @@
+; REQUIRES: asserts
+; RUN: opt -aa-pipeline=default -passes='require<aa>' -debug-pass-manager -disable-output -S < %s 2>&1 | FileCheck %s
+; RUN: llc --debug-only='aa' -o /dev/null %s 2>&1 | FileCheck %s -check-prefix=LEGACY
+
+; In default AA pipeline, NVPTXAA should run before BasicAA to reduce compile time for NVPTX backend
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK: Running analysis: NVPTXAA on foo
+; CHECK-NEXT: Running analysis: BasicAA on foo
+
+; LEGACY: AAResults register Early ExternalAA: NVPTX Address space based Alias Analysis Wrapper
+; LEGACY-NEXT: AAResults register BasicAA
+define void @foo(){
+entry:
+  ret void
+}
+

>From 1d073fd1ca07b1ecf8eb95aa647692d33626ae03 Mon Sep 17 00:00:00 2001
From: PiJoules <6019989+PiJoules at users.noreply.github.com>
Date: Wed, 7 May 2025 15:42:51 -0700
Subject: [PATCH 059/115] [lsan][Fuchsia] Define EarlySanitizerInit for
 standalone lsan (#138946)

I forgot to add this definition in https://github.com/llvm/llvm-project/pull/131886.
---
 compiler-rt/lib/lsan/lsan_fuchsia.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/compiler-rt/lib/lsan/lsan_fuchsia.cpp b/compiler-rt/lib/lsan/lsan_fuchsia.cpp
index ba59bc9b71e33..bb5de89b56729 100644
--- a/compiler-rt/lib/lsan/lsan_fuchsia.cpp
+++ b/compiler-rt/lib/lsan/lsan_fuchsia.cpp
@@ -21,6 +21,11 @@
 
 using namespace __lsan;
 
+namespace __sanitizer {
+// LSan doesn't need to do anything else special in the startup hook.
+void EarlySanitizerInit() {}
+}  // namespace __sanitizer
+
 namespace __lsan {
 
 void LsanOnDeadlySignal(int signo, void *siginfo, void *context) {}

>From 63fcce6611483658e310741b49460ff6350e9bc0 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Wed, 7 May 2025 15:51:19 -0700
Subject: [PATCH 060/115] [IA][RISCV] Add support for vp.load/vp.store with
 shufflevector (#135445)

Teach InterleavedAccessPass to recognize vp.load + shufflevector and
shufflevector + vp.store. Though this patch only adds RISC-V support to
actually lower this pattern. The vp.load/vp.store in this pattern
require constant mask.
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |   8 +-
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  20 +
 llvm/lib/CodeGen/InterleavedAccessPass.cpp    | 206 ++++--
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 235 +++++--
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |  10 +-
 .../rvv/fixed-vectors-interleaved-access.ll   | 601 +++++++++++++++++-
 .../RISCV/rvv/vp-vector-interleaved-access.ll |  53 --
 7 files changed, 920 insertions(+), 213 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index abe261728a3e6..03099e9ad44dc 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3216,8 +3216,7 @@ class TargetLoweringBase {
   /// \p Load is a vp.load instruction.
   /// \p Mask is a mask value
   /// \p DeinterleaveRes is a list of deinterleaved results.
-  virtual bool
-  lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask,
+  virtual bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
                                       ArrayRef<Value *> DeinterleaveRes) const {
     return false;
   }
@@ -3228,9 +3227,8 @@ class TargetLoweringBase {
   /// \p Store is the vp.store instruction.
   /// \p Mask is a mask value
   /// \p InterleaveOps is a list of values being interleaved.
-  virtual bool
-  lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask,
-                                     ArrayRef<Value *> InterleaveOps) const {
+  virtual bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
+                                       ArrayRef<Value *> InterleaveOps) const {
     return false;
   }
 
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 99cb557d9aa09..7da11b93f6b74 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1705,12 +1705,23 @@ let TargetPrefix = "riscv" in {
 
   // Segment loads/stores for fixed vectors.
   foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
+    // Input: (pointer, vl)
     def int_riscv_seg # nf # _load
           : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
                                               !listsplat(LLVMMatchType<0>,
                                               !add(nf, -1))),
                                   [llvm_anyptr_ty, llvm_anyint_ty],
                                   [NoCapture<ArgIndex<0>>, IntrReadMem]>;
+    // Input: (pointer, mask, vl)
+    def int_riscv_seg # nf # _load_mask
+          : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
+                                              !listsplat(LLVMMatchType<0>,
+                                              !add(nf, -1))),
+                                  [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                   llvm_anyint_ty],
+                                  [NoCapture<ArgIndex<0>>, IntrReadMem]>;
+
+    // Input: (<stored values>, pointer, vl)
     def int_riscv_seg # nf # _store
           : DefaultAttrsIntrinsic<[],
                                   !listconcat([llvm_anyvector_ty],
@@ -1718,6 +1729,15 @@ let TargetPrefix = "riscv" in {
                                                           !add(nf, -1)),
                                               [llvm_anyptr_ty, llvm_anyint_ty]),
                                   [NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
+    // Input: (<stored values>, pointer, mask, vl)
+    def int_riscv_seg # nf # _store_mask
+          : DefaultAttrsIntrinsic<[],
+                                  !listconcat([llvm_anyvector_ty],
+                                              !listsplat(LLVMMatchType<0>,
+                                                          !add(nf, -1)),
+                                              [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                                               llvm_anyint_ty]),
+                                  [NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
   }
 
 } // TargetPrefix = "riscv"
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 73f41c05711b7..04d89d61cb6a9 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -100,11 +100,11 @@ class InterleavedAccessImpl {
   unsigned MaxFactor = 0u;
 
   /// Transform an interleaved load into target specific intrinsics.
-  bool lowerInterleavedLoad(LoadInst *LI,
+  bool lowerInterleavedLoad(Instruction *Load,
                             SmallSetVector<Instruction *, 32> &DeadInsts);
 
   /// Transform an interleaved store into target specific intrinsics.
-  bool lowerInterleavedStore(StoreInst *SI,
+  bool lowerInterleavedStore(Instruction *Store,
                              SmallSetVector<Instruction *, 32> &DeadInsts);
 
   /// Transform a load and a deinterleave intrinsic into target specific
@@ -131,7 +131,7 @@ class InterleavedAccessImpl {
   /// made.
   bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
                             SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
-                            LoadInst *LI);
+                            Instruction *LI);
 };
 
 class InterleavedAccess : public FunctionPass {
@@ -249,11 +249,33 @@ static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
   return false;
 }
 
+// Return the corresponded deinterleaved mask, or nullptr if there is no valid
+// mask.
+static Value *getMask(Value *WideMask, unsigned Factor,
+                      ElementCount LeafValueEC);
+
+static Value *getMask(Value *WideMask, unsigned Factor,
+                      VectorType *LeafValueTy) {
+  return getMask(WideMask, Factor, LeafValueTy->getElementCount());
+}
+
 bool InterleavedAccessImpl::lowerInterleavedLoad(
-    LoadInst *LI, SmallSetVector<Instruction *, 32> &DeadInsts) {
-  if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
+    Instruction *Load, SmallSetVector<Instruction *, 32> &DeadInsts) {
+  if (isa<ScalableVectorType>(Load->getType()))
     return false;
 
+  if (auto *LI = dyn_cast<LoadInst>(Load)) {
+    if (!LI->isSimple())
+      return false;
+  } else if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) {
+    assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load);
+    // Require a constant mask.
+    if (!isa<ConstantVector>(VPLoad->getMaskParam()))
+      return false;
+  } else {
+    llvm_unreachable("unsupported load operation");
+  }
+
   // Check if all users of this load are shufflevectors. If we encounter any
   // users that are extractelement instructions or binary operators, we save
   // them to later check if they can be modified to extract from one of the
@@ -265,7 +287,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
   // binop are the same load.
   SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
 
-  for (auto *User : LI->users()) {
+  for (auto *User : Load->users()) {
     auto *Extract = dyn_cast<ExtractElementInst>(User);
     if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
       Extracts.push_back(Extract);
@@ -294,7 +316,7 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
   unsigned Factor, Index;
 
   unsigned NumLoadElements =
-      cast<FixedVectorType>(LI->getType())->getNumElements();
+      cast<FixedVectorType>(Load->getType())->getNumElements();
   auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
   // Check if the first shufflevector is DE-interleave shuffle.
   if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
@@ -327,9 +349,9 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
 
     assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
 
-    if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
+    if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == Load)
       Indices.push_back(Index);
-    if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
+    if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == Load)
       Indices.push_back(Index);
   }
 
@@ -339,25 +361,45 @@ bool InterleavedAccessImpl::lowerInterleavedLoad(
     return false;
 
   bool BinOpShuffleChanged =
-      replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
+      replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, Load);
 
-  LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
+  if (auto *VPLoad = dyn_cast<VPIntrinsic>(Load)) {
+    Value *LaneMask =
+        getMask(VPLoad->getMaskParam(), Factor, cast<VectorType>(VecTy));
+    if (!LaneMask)
+      return false;
 
-  // Try to create target specific intrinsics to replace the load and shuffles.
-  if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
-    // If Extracts is not empty, tryReplaceExtracts made changes earlier.
-    return !Extracts.empty() || BinOpShuffleChanged;
+    LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.load: " << *Load << "\n");
+
+    // Sometimes the number of Shuffles might be less than Factor, we have to
+    // fill the gaps with null. Also, lowerInterleavedVPLoad
+    // expects them to be sorted.
+    SmallVector<Value *, 4> ShuffleValues(Factor, nullptr);
+    for (auto [Idx, ShuffleMaskIdx] : enumerate(Indices))
+      ShuffleValues[ShuffleMaskIdx] = Shuffles[Idx];
+    if (!TLI->lowerInterleavedVPLoad(VPLoad, LaneMask, ShuffleValues))
+      // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+      return !Extracts.empty() || BinOpShuffleChanged;
+  } else {
+    LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *Load << "\n");
+
+    // Try to create target specific intrinsics to replace the load and
+    // shuffles.
+    if (!TLI->lowerInterleavedLoad(cast<LoadInst>(Load), Shuffles, Indices,
+                                   Factor))
+      // If Extracts is not empty, tryReplaceExtracts made changes earlier.
+      return !Extracts.empty() || BinOpShuffleChanged;
   }
 
   DeadInsts.insert_range(Shuffles);
 
-  DeadInsts.insert(LI);
+  DeadInsts.insert(Load);
   return true;
 }
 
 bool InterleavedAccessImpl::replaceBinOpShuffles(
     ArrayRef<ShuffleVectorInst *> BinOpShuffles,
-    SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
+    SmallVectorImpl<ShuffleVectorInst *> &Shuffles, Instruction *Load) {
   for (auto *SVI : BinOpShuffles) {
     BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
     Type *BIOp0Ty = BI->getOperand(0)->getType();
@@ -380,9 +422,9 @@ bool InterleavedAccessImpl::replaceBinOpShuffles(
                       << "\n  With    : " << *NewSVI1 << "\n    And   : "
                       << *NewSVI2 << "\n    And   : " << *NewBI << "\n");
     RecursivelyDeleteTriviallyDeadInstructions(SVI);
-    if (NewSVI1->getOperand(0) == LI)
+    if (NewSVI1->getOperand(0) == Load)
       Shuffles.push_back(NewSVI1);
-    if (NewSVI2->getOperand(0) == LI)
+    if (NewSVI2->getOperand(0) == Load)
       Shuffles.push_back(NewSVI2);
   }
 
@@ -454,27 +496,77 @@ bool InterleavedAccessImpl::tryReplaceExtracts(
 }
 
 bool InterleavedAccessImpl::lowerInterleavedStore(
-    StoreInst *SI, SmallSetVector<Instruction *, 32> &DeadInsts) {
-  if (!SI->isSimple())
-    return false;
+    Instruction *Store, SmallSetVector<Instruction *, 32> &DeadInsts) {
+  Value *StoredValue;
+  if (auto *SI = dyn_cast<StoreInst>(Store)) {
+    if (!SI->isSimple())
+      return false;
+    StoredValue = SI->getValueOperand();
+  } else if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
+    assert(VPStore->getIntrinsicID() == Intrinsic::vp_store);
+    // Require a constant mask.
+    if (!isa<ConstantVector>(VPStore->getMaskParam()))
+      return false;
+    StoredValue = VPStore->getArgOperand(0);
+  } else {
+    llvm_unreachable("unsupported store operation");
+  }
 
-  auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
+  auto *SVI = dyn_cast<ShuffleVectorInst>(StoredValue);
   if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
     return false;
 
+  unsigned NumStoredElements =
+      cast<FixedVectorType>(SVI->getType())->getNumElements();
   // Check if the shufflevector is RE-interleave shuffle.
   unsigned Factor;
   if (!isReInterleaveMask(SVI, Factor, MaxFactor))
     return false;
+  assert(NumStoredElements % Factor == 0 &&
+         "number of stored element should be a multiple of Factor");
+
+  if (auto *VPStore = dyn_cast<VPIntrinsic>(Store)) {
+    unsigned LaneMaskLen = NumStoredElements / Factor;
+    Value *LaneMask = getMask(VPStore->getMaskParam(), Factor,
+                              ElementCount::getFixed(LaneMaskLen));
+    if (!LaneMask)
+      return false;
 
-  LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
+    LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store: " << *Store
+                      << "\n");
 
-  // Try to create target specific intrinsics to replace the store and shuffle.
-  if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
-    return false;
+    IRBuilder<> Builder(VPStore);
+    // We need to effectively de-interleave the shufflemask
+    // because lowerInterleavedVPStore expects individual de-interleaved
+    // values.
+    SmallVector<Value *, 10> NewShuffles;
+    SmallVector<int, 16> NewShuffleMask(LaneMaskLen);
+    auto ShuffleMask = SVI->getShuffleMask();
+
+    for (unsigned i = 0; i < Factor; i++) {
+      for (unsigned j = 0; j < LaneMaskLen; j++)
+        NewShuffleMask[j] = ShuffleMask[i + Factor * j];
+
+      NewShuffles.push_back(Builder.CreateShuffleVector(
+          SVI->getOperand(0), SVI->getOperand(1), NewShuffleMask));
+    }
+
+    // Try to create target specific intrinsics to replace the vp.store and
+    // shuffle.
+    if (!TLI->lowerInterleavedVPStore(VPStore, LaneMask, NewShuffles))
+      // We already created new shuffles.
+      return true;
+  } else {
+    LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
+
+    // Try to create target specific intrinsics to replace the store and
+    // shuffle.
+    if (!TLI->lowerInterleavedStore(cast<StoreInst>(Store), SVI, Factor))
+      return false;
+  }
 
   // Already have a new target specific interleaved store. Erase the old store.
-  DeadInsts.insert(SI);
+  DeadInsts.insert(Store);
   DeadInsts.insert(SVI);
   return true;
 }
@@ -630,10 +722,8 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
   return true;
 }
 
-// Return the corresponded deinterleaved mask, or nullptr if there is no valid
-// mask.
-static Value *getMask(Value *WideMask, unsigned Factor) {
-  using namespace llvm::PatternMatch;
+static Value *getMask(Value *WideMask, unsigned Factor,
+                      ElementCount LeafValueEC) {
   if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
     SmallVector<Value *, 8> Operands;
     SmallVector<Instruction *, 8> DeadInsts;
@@ -644,13 +734,26 @@ static Value *getMask(Value *WideMask, unsigned Factor) {
     }
   }
 
-  if (match(WideMask, m_AllOnes())) {
-    // Scale the vector length of all-ones mask.
-    ElementCount OrigEC =
-        cast<VectorType>(WideMask->getType())->getElementCount();
-    assert(OrigEC.getKnownMinValue() % Factor == 0);
-    return ConstantVector::getSplat(OrigEC.divideCoefficientBy(Factor),
-                                    cast<Constant>(WideMask)->getSplatValue());
+  if (auto *ConstMask = dyn_cast<Constant>(WideMask)) {
+    if (auto *Splat = ConstMask->getSplatValue())
+      // All-ones or all-zeros mask.
+      return ConstantVector::getSplat(LeafValueEC, Splat);
+
+    if (LeafValueEC.isFixed()) {
+      unsigned LeafMaskLen = LeafValueEC.getFixedValue();
+      SmallVector<Constant *, 8> LeafMask(LeafMaskLen, nullptr);
+      // If this is a fixed-length constant mask, each lane / leaf has to
+      // use the same mask. This is done by checking if every group with Factor
+      // number of elements in the interleaved mask has homogeneous values.
+      for (unsigned Idx = 0U; Idx < LeafMaskLen * Factor; ++Idx) {
+        Constant *C = ConstMask->getAggregateElement(Idx);
+        if (LeafMask[Idx / Factor] && LeafMask[Idx / Factor] != C)
+          return nullptr;
+        LeafMask[Idx / Factor] = C;
+      }
+
+      return ConstantVector::get(LeafMask);
+    }
   }
 
   return nullptr;
@@ -673,9 +776,10 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
   if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
     if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
       return false;
-    // Check mask operand. Handle both all-true and interleaved mask.
+    // Check mask operand. Handle both all-true/false and interleaved mask.
     Value *WideMask = VPLoad->getOperand(1);
-    Value *Mask = getMask(WideMask, Factor);
+    Value *Mask = getMask(WideMask, Factor,
+                          cast<VectorType>(DeinterleaveValues[0]->getType()));
     if (!Mask)
       return false;
 
@@ -684,8 +788,7 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
 
     // Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
     // TLI function to emit target-specific interleaved instruction.
-    if (!TLI->lowerDeinterleavedIntrinsicToVPLoad(VPLoad, Mask,
-                                                  DeinterleaveValues))
+    if (!TLI->lowerInterleavedVPLoad(VPLoad, Mask, DeinterleaveValues))
       return false;
 
   } else {
@@ -727,7 +830,8 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
       return false;
 
     Value *WideMask = VPStore->getOperand(2);
-    Value *Mask = getMask(WideMask, Factor);
+    Value *Mask = getMask(WideMask, Factor,
+                          cast<VectorType>(InterleaveValues[0]->getType()));
     if (!Mask)
       return false;
 
@@ -736,8 +840,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
 
     // Since lowerInterleavedStore expects Shuffle and StoreInst, use special
     // TLI function to emit target-specific interleaved instruction.
-    if (!TLI->lowerInterleavedIntrinsicToVPStore(VPStore, Mask,
-                                                 InterleaveValues))
+    if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues))
       return false;
   } else {
     auto *SI = cast<StoreInst>(StoredBy);
@@ -763,12 +866,15 @@ bool InterleavedAccessImpl::runOnFunction(Function &F) {
   SmallSetVector<Instruction *, 32> DeadInsts;
   bool Changed = false;
 
+  using namespace PatternMatch;
   for (auto &I : instructions(F)) {
-    if (auto *LI = dyn_cast<LoadInst>(&I))
-      Changed |= lowerInterleavedLoad(LI, DeadInsts);
+    if (match(&I, m_CombineOr(m_Load(m_Value()),
+                              m_Intrinsic<Intrinsic::vp_load>())))
+      Changed |= lowerInterleavedLoad(&I, DeadInsts);
 
-    if (auto *SI = dyn_cast<StoreInst>(&I))
-      Changed |= lowerInterleavedStore(SI, DeadInsts);
+    if (match(&I, m_CombineOr(m_Store(m_Value(), m_Value()),
+                              m_Intrinsic<Intrinsic::vp_store>())))
+      Changed |= lowerInterleavedStore(&I, DeadInsts);
 
     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
       // At present, we only have intrinsics to represent (de)interleaving
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 756c563f0194d..6319d0ec8447f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1752,6 +1752,13 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::riscv_seg6_load:
   case Intrinsic::riscv_seg7_load:
   case Intrinsic::riscv_seg8_load:
+  case Intrinsic::riscv_seg2_load_mask:
+  case Intrinsic::riscv_seg3_load_mask:
+  case Intrinsic::riscv_seg4_load_mask:
+  case Intrinsic::riscv_seg5_load_mask:
+  case Intrinsic::riscv_seg6_load_mask:
+  case Intrinsic::riscv_seg7_load_mask:
+  case Intrinsic::riscv_seg8_load_mask:
     return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
   case Intrinsic::riscv_seg2_store:
@@ -1765,6 +1772,17 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
                                /*IsStore*/ true,
                                /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
+  case Intrinsic::riscv_seg2_store_mask:
+  case Intrinsic::riscv_seg3_store_mask:
+  case Intrinsic::riscv_seg4_store_mask:
+  case Intrinsic::riscv_seg5_store_mask:
+  case Intrinsic::riscv_seg6_store_mask:
+  case Intrinsic::riscv_seg7_store_mask:
+  case Intrinsic::riscv_seg8_store_mask:
+    // Operands are (vec, ..., vec, ptr, mask, vl)
+    return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
+                               /*IsStore*/ true,
+                               /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
   case Intrinsic::riscv_vle:
   case Intrinsic::riscv_vle_mask:
   case Intrinsic::riscv_vleff:
@@ -10579,13 +10597,20 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   case Intrinsic::riscv_seg5_load:
   case Intrinsic::riscv_seg6_load:
   case Intrinsic::riscv_seg7_load:
-  case Intrinsic::riscv_seg8_load: {
+  case Intrinsic::riscv_seg8_load:
+  case Intrinsic::riscv_seg2_load_mask:
+  case Intrinsic::riscv_seg3_load_mask:
+  case Intrinsic::riscv_seg4_load_mask:
+  case Intrinsic::riscv_seg5_load_mask:
+  case Intrinsic::riscv_seg6_load_mask:
+  case Intrinsic::riscv_seg7_load_mask:
+  case Intrinsic::riscv_seg8_load_mask: {
     SDLoc DL(Op);
     static const Intrinsic::ID VlsegInts[7] = {
-        Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
-        Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
-        Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
-        Intrinsic::riscv_vlseg8};
+        Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
+        Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
+        Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
+        Intrinsic::riscv_vlseg8_mask};
     unsigned NF = Op->getNumValues() - 1;
     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
     MVT XLenVT = Subtarget.getXLenVT();
@@ -10595,7 +10620,19 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                   ContainerVT.getScalarSizeInBits();
     EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
 
-    SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+    // Masked: (pointer, mask, vl)
+    // Non-masked: (pointer, vl)
+    bool IsMasked = Op.getNumOperands() > 4;
+    SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+    SDValue Mask =
+        IsMasked ? Op.getOperand(3) : getAllOnesMask(ContainerVT, VL, DL, DAG);
+    MVT MaskVT = Mask.getSimpleValueType();
+    if (MaskVT.isFixedLengthVector()) {
+      MVT MaskContainerVT =
+          ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+      Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+    }
+
     SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
     auto *Load = cast<MemIntrinsicSDNode>(Op);
 
@@ -10605,7 +10642,10 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
         IntID,
         DAG.getUNDEF(VecTupTy),
         Op.getOperand(2),
+        Mask,
         VL,
+        DAG.getTargetConstant(
+            RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC, DL, XLenVT),
         DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
     SDValue Result =
         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
@@ -10665,15 +10705,39 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
   case Intrinsic::riscv_seg5_store:
   case Intrinsic::riscv_seg6_store:
   case Intrinsic::riscv_seg7_store:
-  case Intrinsic::riscv_seg8_store: {
+  case Intrinsic::riscv_seg8_store:
+  case Intrinsic::riscv_seg2_store_mask:
+  case Intrinsic::riscv_seg3_store_mask:
+  case Intrinsic::riscv_seg4_store_mask:
+  case Intrinsic::riscv_seg5_store_mask:
+  case Intrinsic::riscv_seg6_store_mask:
+  case Intrinsic::riscv_seg7_store_mask:
+  case Intrinsic::riscv_seg8_store_mask: {
     SDLoc DL(Op);
     static const Intrinsic::ID VssegInts[] = {
-        Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
-        Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
-        Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
-        Intrinsic::riscv_vsseg8};
-    // Operands are (chain, int_id, vec*, ptr, vl)
-    unsigned NF = Op->getNumOperands() - 4;
+        Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+        Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+        Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+        Intrinsic::riscv_vsseg8_mask};
+
+    bool IsMasked = false;
+    switch (IntNo) {
+    case Intrinsic::riscv_seg2_store_mask:
+    case Intrinsic::riscv_seg3_store_mask:
+    case Intrinsic::riscv_seg4_store_mask:
+    case Intrinsic::riscv_seg5_store_mask:
+    case Intrinsic::riscv_seg6_store_mask:
+    case Intrinsic::riscv_seg7_store_mask:
+    case Intrinsic::riscv_seg8_store_mask:
+      IsMasked = true;
+      break;
+    default:
+      break;
+    }
+
+    // Non-masked: (chain, int_id, vec*, ptr, vl)
+    // Masked: (chain, int_id, vec*, ptr, mask, vl)
+    unsigned NF = Op->getNumOperands() - (IsMasked ? 5 : 4);
     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
     MVT XLenVT = Subtarget.getXLenVT();
     MVT VT = Op->getOperand(2).getSimpleValueType();
@@ -10682,7 +10746,16 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                   ContainerVT.getScalarSizeInBits();
     EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
 
-    SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+    SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
+    SDValue Mask = IsMasked ? Op.getOperand(Op.getNumOperands() - 2)
+                            : getAllOnesMask(ContainerVT, VL, DL, DAG);
+    MVT MaskVT = Mask.getSimpleValueType();
+    if (MaskVT.isFixedLengthVector()) {
+      MVT MaskContainerVT =
+          ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
+      Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+    }
+
     SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
     SDValue Ptr = Op->getOperand(NF + 2);
 
@@ -10701,6 +10774,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
         IntID,
         StoredVal,
         Ptr,
+        Mask,
         VL,
         DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
 
@@ -24043,15 +24117,20 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
   if (N == 1)
     return true;
 
+  using namespace PatternMatch;
+  // Right now we're only recognizing the simplest pattern.
+  uint64_t C;
+  if (match(V, m_CombineOr(m_ConstantInt(C),
+                           m_c_Mul(m_Value(), m_ConstantInt(C)))) &&
+      C && C % N == 0)
+    return true;
+
   if (isPowerOf2_32(N)) {
     KnownBits KB = llvm::computeKnownBits(V, DL);
     return KB.countMinTrailingZeros() >= Log2_32(N);
   }
 
-  using namespace PatternMatch;
-  // Right now we're only recognizing the simplest pattern.
-  uint64_t C;
-  return match(V, m_c_Mul(m_Value(), m_ConstantInt(C))) && C && C % N == 0;
+  return false;
 }
 
 /// Lower an interleaved vp.load into a vlsegN intrinsic.
@@ -24083,7 +24162,7 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
 /// TODO: We probably can loosen the dependency on matching extractvalue when
 /// dealing with factor of 2 (extractvalue is still required for most of other
 /// factors though).
-bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
+bool RISCVTargetLowering::lowerInterleavedVPLoad(
     VPIntrinsic *Load, Value *Mask,
     ArrayRef<Value *> DeinterleaveResults) const {
   assert(Mask && "Expect a valid mask");
@@ -24092,27 +24171,21 @@ bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
 
   const unsigned Factor = DeinterleaveResults.size();
 
-  auto *WideVTy = dyn_cast<ScalableVectorType>(Load->getType());
-  // TODO: Support fixed vectors.
-  if (!WideVTy)
+  auto *VTy = dyn_cast<VectorType>(DeinterleaveResults[0]->getType());
+  if (!VTy)
     return false;
 
-  unsigned WideNumElements = WideVTy->getElementCount().getKnownMinValue();
-  assert(WideNumElements % Factor == 0 &&
-         "ElementCount of a wide load must be divisible by interleave factor");
-  auto *VTy =
-      VectorType::get(WideVTy->getScalarType(), WideNumElements / Factor,
-                      WideVTy->isScalableTy());
   auto &DL = Load->getModule()->getDataLayout();
   Align Alignment = Load->getParamAlign(0).value_or(
-      DL.getABITypeAlign(WideVTy->getElementType()));
+      DL.getABITypeAlign(VTy->getElementType()));
   if (!isLegalInterleavedAccessType(
           VTy, Factor, Alignment,
           Load->getArgOperand(0)->getType()->getPointerAddressSpace(), DL))
     return false;
 
   IRBuilder<> Builder(Load);
-  Value *WideEVL = Load->getArgOperand(2);
+
+  Value *WideEVL = Load->getVectorLengthParam();
   // Conservatively check if EVL is a multiple of factor, otherwise some
   // (trailing) elements might be lost after the transformation.
   if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor))
@@ -24123,49 +24196,64 @@ bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
       Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
       XLenTy);
 
-  static const Intrinsic::ID IntrMaskIds[] = {
-      Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
-      Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
-      Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
-      Intrinsic::riscv_vlseg8_mask,
-  };
+  Value *Return = nullptr;
+  if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+    static const Intrinsic::ID FixedMaskedVlsegIntrIds[] = {
+        Intrinsic::riscv_seg2_load_mask, Intrinsic::riscv_seg3_load_mask,
+        Intrinsic::riscv_seg4_load_mask, Intrinsic::riscv_seg5_load_mask,
+        Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask,
+        Intrinsic::riscv_seg8_load_mask};
+
+    Return = Builder.CreateIntrinsic(FixedMaskedVlsegIntrIds[Factor - 2],
+                                     {FVTy, XLenTy},
+                                     {Load->getArgOperand(0), Mask, EVL});
+  } else {
+    static const Intrinsic::ID IntrMaskIds[] = {
+        Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
+        Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
+        Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
+        Intrinsic::riscv_vlseg8_mask,
+    };
 
-  unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
-  unsigned NumElts = VTy->getElementCount().getKnownMinValue();
-  Type *VecTupTy = TargetExtType::get(
-      Load->getContext(), "riscv.vector.tuple",
-      ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
-                              NumElts * SEW / 8),
-      Factor);
+    unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
+    unsigned NumElts = VTy->getElementCount().getKnownMinValue();
+    Type *VecTupTy = TargetExtType::get(
+        Load->getContext(), "riscv.vector.tuple",
+        ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
+                                NumElts * SEW / 8),
+        Factor);
 
-  Value *PoisonVal = PoisonValue::get(VecTupTy);
+    Value *PoisonVal = PoisonValue::get(VecTupTy);
 
-  Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
-      Load->getModule(), IntrMaskIds[Factor - 2],
-      {VecTupTy, Mask->getType(), EVL->getType()});
+    Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
+        Load->getModule(), IntrMaskIds[Factor - 2],
+        {VecTupTy, Mask->getType(), EVL->getType()});
 
-  Value *Operands[] = {PoisonVal,
-                       Load->getArgOperand(0),
-                       Mask,
-                       EVL,
-                       ConstantInt::get(XLenTy, RISCVVType::TAIL_AGNOSTIC |
-                                                    RISCVVType::MASK_AGNOSTIC),
-                       ConstantInt::get(XLenTy, Log2_64(SEW))};
+    Value *Operands[] = {
+        PoisonVal,
+        Load->getArgOperand(0),
+        Mask,
+        EVL,
+        ConstantInt::get(XLenTy,
+                         RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC),
+        ConstantInt::get(XLenTy, Log2_64(SEW))};
 
-  CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands);
+    CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands);
 
-  SmallVector<Type *, 8> AggrTypes{Factor, VTy};
-  Value *Return =
-      PoisonValue::get(StructType::get(Load->getContext(), AggrTypes));
-  Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration(
-      Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy});
-  for (unsigned i = 0; i < Factor; ++i) {
-    Value *VecExtract =
-        Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)});
-    Return = Builder.CreateInsertValue(Return, VecExtract, i);
+    SmallVector<Type *, 8> AggrTypes{Factor, VTy};
+    Return = PoisonValue::get(StructType::get(Load->getContext(), AggrTypes));
+    Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration(
+        Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy});
+    for (unsigned i = 0; i < Factor; ++i) {
+      Value *VecExtract =
+          Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)});
+      Return = Builder.CreateInsertValue(Return, VecExtract, i);
+    }
   }
 
   for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
+    if (!DIO)
+      continue;
     // We have to create a brand new ExtractValue to replace each
     // of these old ExtractValue instructions.
     Value *NewEV =
@@ -24196,7 +24284,7 @@ bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
 ///                               <vscale x 32 x i8> %load2, ptr %ptr,
 ///                               %mask,
 ///                               i64 %rvl)
-bool RISCVTargetLowering::lowerInterleavedIntrinsicToVPStore(
+bool RISCVTargetLowering::lowerInterleavedVPStore(
     VPIntrinsic *Store, Value *Mask,
     ArrayRef<Value *> InterleaveOperands) const {
   assert(Mask && "Expect a valid mask");
@@ -24205,8 +24293,7 @@ bool RISCVTargetLowering::lowerInterleavedIntrinsicToVPStore(
 
   const unsigned Factor = InterleaveOperands.size();
 
-  auto *VTy = dyn_cast<ScalableVectorType>(InterleaveOperands[0]->getType());
-  // TODO: Support fixed vectors.
+  auto *VTy = dyn_cast<VectorType>(InterleaveOperands[0]->getType());
   if (!VTy)
     return false;
 
@@ -24230,6 +24317,20 @@ bool RISCVTargetLowering::lowerInterleavedIntrinsicToVPStore(
       Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
       XLenTy);
 
+  if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+    static const Intrinsic::ID FixedMaskedVssegIntrIds[] = {
+        Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
+        Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
+        Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
+        Intrinsic::riscv_seg8_store_mask};
+
+    SmallVector<Value *, 8> Operands(InterleaveOperands);
+    Operands.append({Store->getArgOperand(1), Mask, EVL});
+    Builder.CreateIntrinsic(FixedMaskedVssegIntrIds[Factor - 2], {FVTy, XLenTy},
+                            Operands);
+    return true;
+  }
+
   static const Intrinsic::ID IntrMaskIds[] = {
       Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
       Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3f1fce5d9f7e5..65d433e045423 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -936,13 +936,11 @@ class RISCVTargetLowering : public TargetLowering {
   bool lowerInterleaveIntrinsicToStore(
       StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
 
-  bool lowerDeinterleavedIntrinsicToVPLoad(
-      VPIntrinsic *Load, Value *Mask,
-      ArrayRef<Value *> DeinterleaveRes) const override;
+  bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
+                              ArrayRef<Value *> DeinterleaveRes) const override;
 
-  bool lowerInterleavedIntrinsicToVPStore(
-      VPIntrinsic *Store, Value *Mask,
-      ArrayRef<Value *> InterleaveOps) const override;
+  bool lowerInterleavedVPStore(VPIntrinsic *Store, Value *Mask,
+                               ArrayRef<Value *> InterleaveOps) const override;
 
   bool supportKCFIBundles() const override { return true; }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index f6bdd45330384..8ac4c7447c7d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -176,6 +176,241 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i
   ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res7
 }
 
+define {<4 x i32>, <4 x i32>} @vpload_factor2(ptr %ptr) {
+; CHECK-LABEL: vpload_factor2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vlseg2e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 8)
+  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  ret {<4 x i32>, <4 x i32>} %res1
+}
+
+
+define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) {
+; CHECK-LABEL: vpload_factor3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vlseg3e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12)
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
+}
+
+; We only extract some of the fields.
+define {<4 x i32>, <4 x i32>} @vpload_factor3_partial(ptr %ptr) {
+; CHECK-LABEL: vpload_factor3_partial:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vlseg3e32.v v7, (a0)
+; CHECK-NEXT:    vmv1r.v v8, v7
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12)
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>} poison, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v2, 1
+  ret {<4 x i32>, <4 x i32>} %res1
+}
+
+; Load a larger vector but only deinterleave a subset of the elements.
+define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_v16i32(ptr %ptr) {
+; CHECK-LABEL: vpload_factor3_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vlseg3e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <16 x i32> @llvm.vp.load.v16i32.p0(ptr %ptr, <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, i32 12)
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
+}
+
+; Make sure the mask is propagated.
+define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_mask(ptr %ptr) {
+; CHECK-LABEL: vpload_factor3_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 10
+; CHECK-NEXT:    vlseg3e32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1>, i32 12)
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
+}
+
+; Poison/undef in the shuffle mask shouldn't affect anything.
+define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3_poison_shufflemask(ptr %ptr) {
+; CHECK-LABEL: vpload_factor3_poison_shufflemask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 10
+; CHECK-NEXT:    vlseg3e32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> <i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1>, i32 12)
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 poison, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
+}
+
+define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor4(ptr %ptr) {
+; CHECK-LABEL: vpload_factor4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vlseg4e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <16 x i32> @llvm.vp.load.v16i32.p0(ptr %ptr, <16 x i1> splat (i1 true), i32 16)
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
+  ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3
+}
+
+; TODO: Add more tests for vp.load/store + (de)interleave intrinsics with fixed vectors.
+define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vpload_factor4_intrinsics(ptr %ptr) {
+; CHECK-LABEL: vpload_factor4_intrinsics:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vlseg4e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %wide.masked.load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 8)
+  %d0 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %wide.masked.load)
+  %d0.0 = extractvalue { <4 x i32>, <4 x i32> } %d0, 0
+  %d0.1 = extractvalue { <4 x i32>, <4 x i32> } %d0, 1
+  %d1 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.0)
+  %t0 = extractvalue { <2 x i32>, <2 x i32> } %d1, 0
+  %t2 = extractvalue { <2 x i32>, <2 x i32> } %d1, 1
+  %d2 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.1)
+  %t1 = extractvalue { <2 x i32>, <2 x i32> } %d2, 0
+  %t3 = extractvalue { <2 x i32>, <2 x i32> } %d2, 1
+
+  %res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> %t0, 0
+  %res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1
+  %res2 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res1, <2 x i32> %t2, 2
+  %res3 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res2, <2 x i32> %t3, 3
+  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3
+}
+
+define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor5(ptr %ptr) {
+; CHECK-LABEL: vpload_factor5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vlseg5e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <20 x i32> @llvm.vp.load.v20i32.p0(ptr %ptr, <20 x i1> splat (i1 true), i32 20)
+  %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
+  %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16>
+  %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17>
+  %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18>
+  %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
+  %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4
+  ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4
+}
+
+define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vpload_factor6(ptr %ptr) {
+; CHECK-LABEL: vpload_factor6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vlseg6e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <12 x i16> @llvm.vp.load.v12i16.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 12)
+  %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 0, i32 6>
+  %v1 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 1, i32 7>
+  %v2 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 2, i32 8>
+  %v3 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 3, i32 9>
+  %v4 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 4, i32 10>
+  %v5 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 5, i32 11>
+  %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
+  %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
+  %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
+  %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
+  %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
+  %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
+  ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5
+}
+
+define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vpload_factor7(ptr %ptr) {
+; CHECK-LABEL: vpload_factor7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vlseg7e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <14 x i16> @llvm.vp.load.v14i16.p0(ptr %ptr, <14 x i1> splat (i1 true), i32 14)
+  %v0 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 0, i32 7>
+  %v1 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 1, i32 8>
+  %v2 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 2, i32 9>
+  %v3 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 3, i32 10>
+  %v4 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 4, i32 11>
+  %v5 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 5, i32 12>
+  %v6 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 6, i32 13>
+  %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
+  %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
+  %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
+  %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
+  %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
+  %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
+  %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6
+  ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6
+}
+
+define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vpload_factor8(ptr %ptr) {
+; CHECK-LABEL: vpload_factor8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vlseg8e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = tail call <16 x i16> @llvm.vp.load.v16i16.p0(ptr %ptr, <16 x i1> splat (i1 true), i32 16)
+  %v0 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 0, i32 8>
+  %v1 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 1, i32 9>
+  %v2 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 2, i32 10>
+  %v3 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 3, i32 11>
+  %v4 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 4, i32 12>
+  %v5 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 5, i32 13>
+  %v6 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 6, i32 14>
+  %v7 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 7, i32 15>
+  %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
+  %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
+  %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
+  %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
+  %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
+  %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
+  %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6
+  %res7 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6, <2 x i16> %v7, 7
+  ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res7
+}
+
 ; LMUL * NF is > 8 here and so shouldn't be lowered to a vlseg
 define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_factor6_too_big(ptr %ptr) {
 ; RV32-LABEL: load_factor6_too_big:
@@ -192,8 +427,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    lui a3, 12
 ; RV32-NEXT:    lui a6, 12291
-; RV32-NEXT:    lui a7, %hi(.LCPI8_0)
-; RV32-NEXT:    addi a7, a7, %lo(.LCPI8_0)
+; RV32-NEXT:    lui a7, %hi(.LCPI20_0)
+; RV32-NEXT:    addi a7, a7, %lo(.LCPI20_0)
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vle32.v v24, (a5)
 ; RV32-NEXT:    vmv.s.x v0, a3
@@ -278,12 +513,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
 ; RV32-NEXT:    lui a7, 49164
-; RV32-NEXT:    lui a1, %hi(.LCPI8_1)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_1)
+; RV32-NEXT:    lui a1, %hi(.LCPI20_1)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI20_1)
 ; RV32-NEXT:    lui t2, 3
 ; RV32-NEXT:    lui t1, 196656
-; RV32-NEXT:    lui a4, %hi(.LCPI8_3)
-; RV32-NEXT:    addi a4, a4, %lo(.LCPI8_3)
+; RV32-NEXT:    lui a4, %hi(.LCPI20_3)
+; RV32-NEXT:    addi a4, a4, %lo(.LCPI20_3)
 ; RV32-NEXT:    lui t0, 786624
 ; RV32-NEXT:    li a5, 48
 ; RV32-NEXT:    lui a6, 768
@@ -462,8 +697,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v24, v8, v2
-; RV32-NEXT:    lui a1, %hi(.LCPI8_2)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_2)
+; RV32-NEXT:    lui a1, %hi(.LCPI20_2)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI20_2)
 ; RV32-NEXT:    lui a3, 3073
 ; RV32-NEXT:    addi a3, a3, -1024
 ; RV32-NEXT:    vmv.s.x v0, a3
@@ -527,16 +762,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vrgatherei16.vv v28, v8, v3
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
 ; RV32-NEXT:    vmv.v.v v28, v24
-; RV32-NEXT:    lui a1, %hi(.LCPI8_4)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_4)
-; RV32-NEXT:    lui a2, %hi(.LCPI8_5)
-; RV32-NEXT:    addi a2, a2, %lo(.LCPI8_5)
+; RV32-NEXT:    lui a1, %hi(.LCPI20_4)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI20_4)
+; RV32-NEXT:    lui a2, %hi(.LCPI20_5)
+; RV32-NEXT:    addi a2, a2, %lo(.LCPI20_5)
 ; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RV32-NEXT:    vle16.v v24, (a2)
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; RV32-NEXT:    vle16.v v8, (a1)
-; RV32-NEXT:    lui a1, %hi(.LCPI8_7)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_7)
+; RV32-NEXT:    lui a1, %hi(.LCPI20_7)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI20_7)
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vle16.v v10, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
@@ -564,14 +799,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v16, v0, v10
-; RV32-NEXT:    lui a1, %hi(.LCPI8_6)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_6)
-; RV32-NEXT:    lui a2, %hi(.LCPI8_8)
-; RV32-NEXT:    addi a2, a2, %lo(.LCPI8_8)
+; RV32-NEXT:    lui a1, %hi(.LCPI20_6)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI20_6)
+; RV32-NEXT:    lui a2, %hi(.LCPI20_8)
+; RV32-NEXT:    addi a2, a2, %lo(.LCPI20_8)
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; RV32-NEXT:    vle16.v v4, (a1)
-; RV32-NEXT:    lui a1, %hi(.LCPI8_9)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_9)
+; RV32-NEXT:    lui a1, %hi(.LCPI20_9)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI20_9)
 ; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RV32-NEXT:    vle16.v v6, (a1)
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
@@ -658,8 +893,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    li a4, 128
 ; RV64-NEXT:    lui a1, 1
 ; RV64-NEXT:    vle64.v v8, (a3)
-; RV64-NEXT:    lui a3, %hi(.LCPI8_0)
-; RV64-NEXT:    addi a3, a3, %lo(.LCPI8_0)
+; RV64-NEXT:    lui a3, %hi(.LCPI20_0)
+; RV64-NEXT:    addi a3, a3, %lo(.LCPI20_0)
 ; RV64-NEXT:    vmv.s.x v0, a4
 ; RV64-NEXT:    csrr a4, vlenb
 ; RV64-NEXT:    li a5, 61
@@ -847,8 +1082,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
 ; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV64-NEXT:    vslideup.vi v12, v16, 1, v0.t
-; RV64-NEXT:    lui a2, %hi(.LCPI8_1)
-; RV64-NEXT:    addi a2, a2, %lo(.LCPI8_1)
+; RV64-NEXT:    lui a2, %hi(.LCPI20_1)
+; RV64-NEXT:    addi a2, a2, %lo(.LCPI20_1)
 ; RV64-NEXT:    li a3, 192
 ; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RV64-NEXT:    vle16.v v6, (a2)
@@ -882,8 +1117,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    vrgatherei16.vv v24, v16, v6
 ; RV64-NEXT:    addi a2, sp, 16
 ; RV64-NEXT:    vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; RV64-NEXT:    lui a2, %hi(.LCPI8_2)
-; RV64-NEXT:    addi a2, a2, %lo(.LCPI8_2)
+; RV64-NEXT:    lui a2, %hi(.LCPI20_2)
+; RV64-NEXT:    addi a2, a2, %lo(.LCPI20_2)
 ; RV64-NEXT:    li a3, 1040
 ; RV64-NEXT:    vmv.s.x v0, a3
 ; RV64-NEXT:    addi a1, a1, -2016
@@ -967,12 +1202,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
-; RV64-NEXT:    lui a1, %hi(.LCPI8_3)
-; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_3)
+; RV64-NEXT:    lui a1, %hi(.LCPI20_3)
+; RV64-NEXT:    addi a1, a1, %lo(.LCPI20_3)
 ; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RV64-NEXT:    vle16.v v20, (a1)
-; RV64-NEXT:    lui a1, %hi(.LCPI8_4)
-; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_4)
+; RV64-NEXT:    lui a1, %hi(.LCPI20_4)
+; RV64-NEXT:    addi a1, a1, %lo(.LCPI20_4)
 ; RV64-NEXT:    vle16.v v8, (a1)
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a2, 77
@@ -1023,8 +1258,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV64-NEXT:    vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
 ; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV64-NEXT:    vrgatherei16.vv v0, v16, v8
-; RV64-NEXT:    lui a1, %hi(.LCPI8_5)
-; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_5)
+; RV64-NEXT:    lui a1, %hi(.LCPI20_5)
+; RV64-NEXT:    addi a1, a1, %lo(.LCPI20_5)
 ; RV64-NEXT:    vle16.v v20, (a1)
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    li a2, 61
@@ -1196,6 +1431,154 @@ define void @store_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2
   ret void
 }
 
+define void @store_factor7(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6) {
+; CHECK-LABEL: store_factor7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vsseg7e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s4 = shufflevector <2 x i16> %v6, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
+  store <14 x i16> %interleaved.vec, ptr %ptr
+  ret void
+}
+
+define void @store_factor8(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i16> %v7) {
+; CHECK-LABEL: store_factor8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vsseg8e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s4 = shufflevector <2 x i16> %v6, <2 x i16> %v7, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  store <16 x i16> %interleaved.vec, ptr %ptr
+  ret void
+}
+
+define void @vpstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: vpstore_factor2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsseg2e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  tail call void @llvm.vp.store.v8i32.p0(<8 x i32> %interleaved.vec, ptr %ptr, <8 x i1> splat (i1 true), i32 8)
+  ret void
+}
+
+define void @vpstore_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: vpstore_factor3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsseg3e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> splat (i1 true), i32 12)
+  ret void
+}
+
+define void @vpstore_factor3_mask(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
+; CHECK-LABEL: vpstore_factor3_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v0, 5
+; CHECK-NEXT:    vsseg3e32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
+  tail call void @llvm.vp.store.v12i32.p0(<12 x i32> %interleaved.vec, ptr %ptr, <12 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0>, i32 12)
+  ret void
+}
+
+define void @vpstore_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: vpstore_factor4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsseg4e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+  tail call void @llvm.vp.store.v16i32.p0(<16 x i32> %interleaved.vec, ptr %ptr, <16 x i1> splat (i1 true), i32 16)
+  ret void
+}
+
+define void @vpstore_factor5(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) {
+; CHECK-LABEL: vpstore_factor5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vsseg5e32.v v8, (a0)
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s2 = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %s3 = shufflevector <4 x i32> %v4, <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <16 x i32> %s2, <16 x i32> %s3, <20 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 1, i32 5, i32 9, i32 13, i32 17, i32 2, i32 6, i32 10, i32 14, i32 18, i32 3, i32 7, i32 11, i32 15, i32 19>
+  tail call void @llvm.vp.store.v20i32.p0(<20 x i32> %interleaved.vec, ptr %ptr, <20 x i1> splat (i1 true), i32 20)
+  ret void
+}
+
+define void @vpstore_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5) {
+; CHECK-LABEL: vpstore_factor6:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vsseg6e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s2 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s3 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i16> %s2, <8 x i16> %s3, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
+  tail call void @llvm.vp.store.v12i16.p0(<12 x i16> %interleaved.vec, ptr %ptr, <12 x i1> splat (i1 true), i32 12)
+  ret void
+}
+
+define void @vpstore_factor7(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6) {
+; CHECK-LABEL: vpstore_factor7:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vsseg7e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s4 = shufflevector <2 x i16> %v6, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef, i32 undef>
+  %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <14 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
+  tail call void @llvm.vp.store.v14i16.p0(<14 x i16> %interleaved.vec, ptr %ptr, <14 x i1> splat (i1 true), i32 14)
+  ret void
+}
+
+define void @vpstore_factor8(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5, <2 x i16> %v6, <2 x i16> %v7) {
+; CHECK-LABEL: vpstore_factor8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vsseg8e16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s2 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s3 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %s4 = shufflevector <2 x i16> %v6, <2 x i16> %v7, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s5 = shufflevector <4 x i16> %s2, <4 x i16> %s4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %interleaved.vec = shufflevector <8 x i16> %s3, <8 x i16> %s5, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  tail call void @llvm.vp.store.v16i16.p0(<16 x i16> %interleaved.vec, ptr %ptr, <16 x i1> splat (i1 true), i32 16)
+  ret void
+}
 
 define <4 x i32> @load_factor2_one_active(ptr %ptr) {
 ; CHECK-LABEL: load_factor2_one_active:
@@ -1368,3 +1751,157 @@ define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) {
   store <16 x i32> %v0, ptr %ptr
   ret void
 }
+
+; Negative tests
+
+define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
+; RV32-LABEL: invalid_vp_mask:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a1, 73
+; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v11, a1
+; RV32-NEXT:    lui a1, 1
+; RV32-NEXT:    vmv.v.i v10, 8
+; RV32-NEXT:    addi a1, a1, -43
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT:    vmv.s.x v0, a1
+; RV32-NEXT:    li a1, 146
+; RV32-NEXT:    vsetivli zero, 12, e32, m4, ta, ma
+; RV32-NEXT:    vle32.v v12, (a0), v0.t
+; RV32-NEXT:    li a0, 36
+; RV32-NEXT:    vmv.s.x v20, a1
+; RV32-NEXT:    lui a1, %hi(.LCPI49_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI49_0)
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vle16.v v21, (a1)
+; RV32-NEXT:    vcompress.vm v8, v12, v11
+; RV32-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v12, 8
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV32-NEXT:    vrgather.vi v8, v16, 1, v0.t
+; RV32-NEXT:    vcompress.vm v14, v12, v20
+; RV32-NEXT:    vrgather.vi v14, v16, 2, v0.t
+; RV32-NEXT:    vmv.s.x v0, a0
+; RV32-NEXT:    vmerge.vvm v12, v16, v12, v0
+; RV32-NEXT:    vrgatherei16.vv v10, v12, v21
+; RV32-NEXT:    vmv1r.v v9, v14
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: invalid_vp_mask:
+; RV64:       # %bb.0:
+; RV64-NEXT:    li a1, 73
+; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v11, a1
+; RV64-NEXT:    li a1, 146
+; RV64-NEXT:    vmv.s.x v20, a1
+; RV64-NEXT:    lui a1, 1
+; RV64-NEXT:    vmv.v.i v10, 8
+; RV64-NEXT:    addi a1, a1, -43
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT:    vmv.s.x v0, a1
+; RV64-NEXT:    li a1, 36
+; RV64-NEXT:    vsetivli zero, 12, e32, m4, ta, ma
+; RV64-NEXT:    vle32.v v12, (a0), v0.t
+; RV64-NEXT:    li a0, 3
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    addi a0, a0, 5
+; RV64-NEXT:    slli a0, a0, 16
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT:    vcompress.vm v8, v12, v11
+; RV64-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v12, 8
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV64-NEXT:    vrgather.vi v8, v16, 1, v0.t
+; RV64-NEXT:    vcompress.vm v14, v12, v20
+; RV64-NEXT:    vrgather.vi v14, v16, 2, v0.t
+; RV64-NEXT:    vmv.s.x v0, a1
+; RV64-NEXT:    addi a0, a0, 2
+; RV64-NEXT:    vmerge.vvm v12, v16, v12, v0
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a0
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT:    vrgatherei16.vv v10, v12, v9
+; RV64-NEXT:    vmv1r.v v9, v14
+; RV64-NEXT:    ret
+  %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i32 12)
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
+}
+
+define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
+; RV32-LABEL: invalid_vp_evl:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 10, e32, m4, ta, ma
+; RV32-NEXT:    vle32.v v12, (a0)
+; RV32-NEXT:    li a0, 73
+; RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV32-NEXT:    vmv.v.i v0, 8
+; RV32-NEXT:    vmv.s.x v10, a0
+; RV32-NEXT:    li a0, 146
+; RV32-NEXT:    vmv.s.x v11, a0
+; RV32-NEXT:    lui a0, %hi(.LCPI50_0)
+; RV32-NEXT:    addi a0, a0, %lo(.LCPI50_0)
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT:    vle16.v v20, (a0)
+; RV32-NEXT:    li a0, 36
+; RV32-NEXT:    vcompress.vm v8, v12, v10
+; RV32-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v12, 8
+; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV32-NEXT:    vrgather.vi v8, v16, 1, v0.t
+; RV32-NEXT:    vcompress.vm v14, v12, v11
+; RV32-NEXT:    vrgather.vi v14, v16, 2, v0.t
+; RV32-NEXT:    vmv.s.x v0, a0
+; RV32-NEXT:    vmerge.vvm v12, v16, v12, v0
+; RV32-NEXT:    vrgatherei16.vv v10, v12, v20
+; RV32-NEXT:    vmv1r.v v9, v14
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: invalid_vp_evl:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 10, e32, m4, ta, ma
+; RV64-NEXT:    vle32.v v12, (a0)
+; RV64-NEXT:    li a0, 73
+; RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.i v0, 8
+; RV64-NEXT:    vmv.s.x v10, a0
+; RV64-NEXT:    li a0, 146
+; RV64-NEXT:    vmv.s.x v11, a0
+; RV64-NEXT:    li a0, 36
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT:    vcompress.vm v8, v12, v10
+; RV64-NEXT:    vsetivli zero, 8, e32, m4, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v12, 8
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV64-NEXT:    vrgather.vi v8, v16, 1, v0.t
+; RV64-NEXT:    vcompress.vm v14, v12, v11
+; RV64-NEXT:    vrgather.vi v14, v16, 2, v0.t
+; RV64-NEXT:    vmv.s.x v0, a0
+; RV64-NEXT:    li a0, 3
+; RV64-NEXT:    slli a0, a0, 32
+; RV64-NEXT:    addi a0, a0, 5
+; RV64-NEXT:    slli a0, a0, 16
+; RV64-NEXT:    addi a0, a0, 2
+; RV64-NEXT:    vmerge.vvm v12, v16, v12, v0
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a0
+; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT:    vrgatherei16.vv v10, v12, v9
+; RV64-NEXT:    vmv1r.v v9, v14
+; RV64-NEXT:    ret
+  %interleaved.vec = tail call <12 x i32> @llvm.vp.load.v12i32.p0(ptr %ptr, <12 x i1> splat (i1 true), i32 10)
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
+  ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index d6e1af59e6341..d0f35aa8b85e9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -616,59 +616,6 @@ define void @not_balanced_store_tree(<vscale x 1 x i32> %v0, <vscale x 2 x i32>
   ret void
 }
 
-; We only support scalable vectors for now.
-define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @not_scalable_vectors(ptr %ptr, i32 %evl) {
-; RV32-LABEL: not_scalable_vectors:
-; RV32:       # %bb.0:
-; RV32-NEXT:    slli a1, a1, 2
-; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    li a0, 32
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vnsrl.wx v12, v8, a0
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
-; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT:    vnsrl.wx v10, v11, a0
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
-; RV32-NEXT:    vnsrl.wx v11, v12, a0
-; RV32-NEXT:    vnsrl.wi v9, v12, 0
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: not_scalable_vectors:
-; RV64:       # %bb.0:
-; RV64-NEXT:    slli a1, a1, 34
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vnsrl.wx v12, v8, a0
-; RV64-NEXT:    vnsrl.wi v11, v8, 0
-; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT:    vnsrl.wx v10, v11, a0
-; RV64-NEXT:    vnsrl.wi v8, v11, 0
-; RV64-NEXT:    vnsrl.wx v11, v12, a0
-; RV64-NEXT:    vnsrl.wi v9, v12, 0
-; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 4
-  %wide.masked.load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %rvl)
-  %d0 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %wide.masked.load)
-  %d0.0 = extractvalue { <4 x i32>, <4 x i32> } %d0, 0
-  %d0.1 = extractvalue { <4 x i32>, <4 x i32> } %d0, 1
-  %d1 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.0)
-  %t0 = extractvalue { <2 x i32>, <2 x i32> } %d1, 0
-  %t2 = extractvalue { <2 x i32>, <2 x i32> } %d1, 1
-  %d2 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d0.1)
-  %t1 = extractvalue { <2 x i32>, <2 x i32> } %d2, 0
-  %t3 = extractvalue { <2 x i32>, <2 x i32> } %d2, 1
-
-  %res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } poison, <2 x i32> %t0, 0
-  %res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1
-  %res2 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res1, <2 x i32> %t2, 2
-  %res3 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res2, <2 x i32> %t3, 3
-  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3
-}
-
 define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1> %mask0, <vscale x 2 x i1> %mask1, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: not_same_mask:
 ; RV32:       # %bb.0:

>From 0d0ef58c8facb0f2e5c2f4615b0d25f19e5abe01 Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda at apple.com>
Date: Wed, 7 May 2025 15:53:30 -0700
Subject: [PATCH 061/115] [lldb][Darwin] Note why this test is xfail'ed on
 darwin - due to there not being any eh_frame instructions for _sigtramp from
 the system libraries.

---
 lldb/test/Shell/Unwind/signal-in-leaf-function-aarch64.test | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/test/Shell/Unwind/signal-in-leaf-function-aarch64.test b/lldb/test/Shell/Unwind/signal-in-leaf-function-aarch64.test
index 2ac2d4a750782..050c41c73f9c2 100644
--- a/lldb/test/Shell/Unwind/signal-in-leaf-function-aarch64.test
+++ b/lldb/test/Shell/Unwind/signal-in-leaf-function-aarch64.test
@@ -1,6 +1,8 @@
 # REQUIRES: target-aarch64 && native
 # UNSUPPORTED: system-windows
 # llvm.org/pr91610, rdar://128031075
+
+# Darwin _sigtramp doesn't have eh_frame instruction on AArch64
 # XFAIL: system-darwin
 
 

>From b756c82bfacb2822cd516c32ae3c406e71448c0a Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziqing at udel.edu>
Date: Wed, 7 May 2025 15:08:44 -0700
Subject: [PATCH 062/115] Re-land "[analyzer] Make it a noop when initializing
 a field of empty record" (#138951)

The original commit assumes that
`CXXConstructExpr->getType()->getAsRecordDecl()` is always a
`CXXRecordDecl` but it is not true for ObjC programs.

This relanding changes
`cast<CXXRecordDecl>(CXXConstructExpr->getType()->getAsRecordDecl())`
to
`dyn_cast_or_null<CXXRecordDecl>(CXXConstructExpr->getType()->getAsRecordDecl())`

This reverts commit 9048c2d4f239cb47fed17cb150e2bbf3934454c2.
rdar://146753089
---
 .../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp |  8 ++-
 clang/test/Analysis/issue-137252.cpp          | 50 +++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Analysis/issue-137252.cpp

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index 92ce3fa2225c8..e07e24faa3490 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/AttrIterator.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/ParentMap.h"
@@ -23,6 +24,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Sequence.h"
+#include "llvm/Support/Casting.h"
 #include <optional>
 
 using namespace clang;
@@ -715,7 +717,11 @@ void ExprEngine::handleConstructor(const Expr *E,
         // actually make things worse. Placement new makes this tricky as well,
         // since it's then possible to be initializing one part of a multi-
         // dimensional array.
-        State = State->bindDefaultZero(Target, LCtx);
+        const CXXRecordDecl *TargetHeldRecord =
+            dyn_cast_or_null<CXXRecordDecl>(CE->getType()->getAsRecordDecl());
+
+        if (!TargetHeldRecord || !TargetHeldRecord->isEmpty())
+          State = State->bindDefaultZero(Target, LCtx);
       }
 
       Bldr.generateNode(CE, N, State, /*tag=*/nullptr,
diff --git a/clang/test/Analysis/issue-137252.cpp b/clang/test/Analysis/issue-137252.cpp
new file mode 100644
index 0000000000000..6ca3e20ccbbca
--- /dev/null
+++ b/clang/test/Analysis/issue-137252.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=cplusplus -verify %s -DEMPTY_CLASS
+// UNSUPPORTED: system-windows
+// expected-no-diagnostics
+
+// This test reproduces the issue that previously the static analyzer
+// initialized an [[no_unique_address]] empty field to zero,
+// over-writing a non-empty field with the same offset.
+
+namespace std {
+#ifdef EMPTY_CLASS
+
+  struct default_delete {};
+  template <class _Tp, class _Dp = default_delete >
+#else
+  // Class with methods and static members is still empty:
+  template <typename T>
+  class default_delete {
+    T dump();
+    static T x;
+  };
+  template <class _Tp, class _Dp = default_delete<_Tp> >
+#endif
+  class unique_ptr {
+    [[no_unique_address]]  _Tp * __ptr_;
+    [[no_unique_address]] _Dp __deleter_;
+
+  public:
+    explicit unique_ptr(_Tp* __p) noexcept
+      : __ptr_(__p),
+        __deleter_() {}
+
+    ~unique_ptr() {
+      delete __ptr_;
+    }
+  };
+}
+
+struct X {};
+
+int main()
+{
+  // Previously a leak falsely reported here.  It was because the
+  // Static Analyzer engine simulated the initialization of
+  // `__deleter__` incorrectly.  The engine assigned zero to
+  // `__deleter__`--an empty record sharing offset with `__ptr__`.
+  // The assignment over wrote `__ptr__`.
+  std::unique_ptr<X> a(new X()); 
+  return 0;
+}

>From 6a28d8c24a07cb562ad53335ee9a5f07328b20e7 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 7 May 2025 15:57:01 -0700
Subject: [PATCH 063/115] [sanitizer] Add plumbing for
 -fsanitize-annotate-debug-info and partly replace '-mllvm
 -array-bounds-pseudofn' (#138577)

@fmayer introduced '-mllvm -array-bounds-pseudofn'
(https://github.com/llvm/llvm-project/pull/128977/) to make it easier to
see why crashes occurred, and to estimate with a profiler the cycles
spent on these array-bounds checks. This functionality could be usefully
generalized to other checks in future work.

This patch adds the plumbing for -fsanitize-annotate-debug-info, and
connects it to the existing array-bounds-pseudo-fn functionality i.e.,
-fsanitize-annotate-debug-info=array-bounds can be used as a replacement
for '-mllvm -array-bounds-pseudofn', though we do not yet delete the
latter.

Note: we replaced '-mllvm -array-bounds-pseudofn' in
clang/test/CodeGen/bounds-checking-debuginfo.c, because adding test
cases would modify the line numbers in the test assertions, and
therefore obscure that the test output is the same between '-mllvm
-array-bounds-pseudofn' and -fsanitize-annotate-debug-info=array-bounds.
---
 clang/include/clang/Basic/CodeGenOptions.h    |  4 ++
 clang/include/clang/Driver/Options.td         | 25 ++++++++
 clang/include/clang/Driver/SanitizerArgs.h    |  1 +
 clang/lib/CodeGen/CGExpr.cpp                  | 10 +++-
 clang/lib/Driver/SanitizerArgs.cpp            | 19 +++++-
 clang/lib/Frontend/CompilerInvocation.cpp     |  9 +++
 .../test/CodeGen/bounds-checking-debuginfo.c  |  4 +-
 clang/test/Driver/fsanitize.c                 | 58 +++++++++++++++++++
 8 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h
index e39a73bdb13ac..e3fa6a55e7608 100644
--- a/clang/include/clang/Basic/CodeGenOptions.h
+++ b/clang/include/clang/Basic/CodeGenOptions.h
@@ -399,6 +399,10 @@ class CodeGenOptions : public CodeGenOptionsBase {
   /// (0.0 [default] to skip none, 1.0 to skip all).
   SanitizerMaskCutoffs SanitizeSkipHotCutoffs;
 
+  /// Set of sanitizer checks, for which the instrumentation will be annotated
+  /// with extra debug info.
+  SanitizerSet SanitizeAnnotateDebugInfo;
+
   /// List of backend command-line options for -fembed-bitcode.
   std::vector<uint8_t> CmdArgs;
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 30ea75bb108d5..351e1ad4e1b03 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2540,6 +2540,31 @@ def fno_sanitize_merge_handlers : Flag<["-"], "fno-sanitize-merge">, Group<f_cla
                         Alias<fno_sanitize_merge_handlers_EQ>, AliasArgs<["all"]>,
                         Visibility<[ClangOption, CLOption]>,
                         HelpText<"Do not allow compiler to merge handlers for any sanitizers">;
+def fsanitize_annotate_debug_info_EQ
+    : CommaJoined<["-"], "fsanitize-annotate-debug-info=">,
+      Group<f_clang_Group>,
+      HelpText<"Annotate sanitizer instrumentation with extra debug info for "
+               "the specified sanitizers, if supported">;
+def fno_sanitize_annotate_debug_info_EQ
+    : CommaJoined<["-"], "fno-sanitize-annotate-debug-info=">,
+      Group<f_clang_Group>,
+      HelpText<"Do not allow compiler to annotate sanitizer instrumentation "
+               "with extra debug info for the specified sanitizers">;
+def fsanitize_annotate_debug_info
+    : Flag<["-"], "fsanitize-annotate-debug-info">,
+      Group<f_clang_Group>,
+      Alias<fsanitize_annotate_debug_info_EQ>,
+      AliasArgs<["all"]>,
+      HelpText<"Allow compiler to annotate sanitizer instrumentation with "
+               "extra debug info for all sanitizers, where supported">;
+def fno_sanitize_annotate_debug_info
+    : Flag<["-"], "fno-sanitize-annotate-debug-info">,
+      Group<f_clang_Group>,
+      Alias<fno_sanitize_annotate_debug_info_EQ>,
+      AliasArgs<["all"]>,
+      Visibility<[ClangOption, CLOption]>,
+      HelpText<"Do not allow compiler to annotate sanitizer instrumentation "
+               "with extra debug info for any sanitizers">;
 def fsanitize_undefined_trap_on_error
     : Flag<["-"], "fsanitize-undefined-trap-on-error">, Group<f_clang_Group>,
       Alias<fsanitize_trap_EQ>, AliasArgs<["undefined"]>;
diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h
index 513339060f2b2..2b72268c8606c 100644
--- a/clang/include/clang/Driver/SanitizerArgs.h
+++ b/clang/include/clang/Driver/SanitizerArgs.h
@@ -27,6 +27,7 @@ class SanitizerArgs {
   SanitizerSet TrapSanitizers;
   SanitizerSet MergeHandlers;
   SanitizerMaskCutoffs SkipHotCutoffs;
+  SanitizerSet AnnotateDebugInfo;
 
   std::vector<std::string> UserIgnorelistFiles;
   std::vector<std::string> SystemIgnorelistFiles;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 2e01adc51fdf0..6f5ead78f2b23 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1228,7 +1228,11 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound,
   SanitizerScope SanScope(this);
 
   llvm::DILocation *CheckDI = Builder.getCurrentDebugLocation();
-  if (ClArrayBoundsPseudoFn && CheckDI) {
+  auto CheckKind = SanitizerKind::SO_ArrayBounds;
+  // TODO: deprecate ClArrayBoundsPseudoFn
+  if ((ClArrayBoundsPseudoFn ||
+       CGM.getCodeGenOpts().SanitizeAnnotateDebugInfo.has(CheckKind)) &&
+      CheckDI) {
     CheckDI = getDebugInfo()->CreateSyntheticInlineAt(
         Builder.getCurrentDebugLocation(), "__ubsan_check_array_bounds");
   }
@@ -1245,8 +1249,8 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound,
   };
   llvm::Value *Check = Accessed ? Builder.CreateICmpULT(IndexVal, BoundVal)
                                 : Builder.CreateICmpULE(IndexVal, BoundVal);
-  EmitCheck(std::make_pair(Check, SanitizerKind::SO_ArrayBounds),
-            SanitizerHandler::OutOfBounds, StaticData, Index);
+  EmitCheck(std::make_pair(Check, CheckKind), SanitizerHandler::OutOfBounds,
+            StaticData, Index);
 }
 
 CodeGenFunction::ComplexPairTy CodeGenFunction::
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index b29fde92d0722..6ff45d145e81c 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -76,6 +76,7 @@ static const SanitizerMask MergeDefault =
     SanitizerKind::Undefined | SanitizerKind::Vptr;
 static const SanitizerMask TrappingDefault =
     SanitizerKind::CFI | SanitizerKind::LocalBounds;
+static const SanitizerMask AnnotateDebugInfoDefault;
 static const SanitizerMask CFIClasses =
     SanitizerKind::CFIVCall | SanitizerKind::CFINVCall |
     SanitizerKind::CFIMFCall | SanitizerKind::CFIDerivedCast |
@@ -738,6 +739,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   // Parse -fno-sanitize-top-hot flags
   SkipHotCutoffs = parseSanitizeSkipHotCutoffArgs(D, Args, DiagnoseErrors);
 
+  // Parse -f(no-)?sanitize-annotate-debug-info flags
+  SanitizerMask AnnotateDebugInfoKinds =
+      parseSanitizeArgs(D, Args, DiagnoseErrors, AnnotateDebugInfoDefault, {},
+                        {}, options::OPT_fsanitize_annotate_debug_info_EQ,
+                        options::OPT_fno_sanitize_annotate_debug_info_EQ);
+  AnnotateDebugInfoKinds &= Kinds;
+
   // Setup ignorelist files.
   // Add default ignorelist from resource directory for activated sanitizers,
   // and validate special case lists format.
@@ -1168,6 +1176,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
 
   MergeHandlers.Mask |= MergeKinds;
 
+  AnnotateDebugInfo.Mask |= AnnotateDebugInfoKinds;
+
   // Zero out SkipHotCutoffs for unused sanitizers
   SkipHotCutoffs.clear(~Sanitizers.Mask);
 }
@@ -1351,6 +1361,10 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
     CmdArgs.push_back(
         Args.MakeArgString("-fsanitize-skip-hot-cutoff=" + SkipHotCutoffsStr));
 
+  if (!AnnotateDebugInfo.empty())
+    CmdArgs.push_back(Args.MakeArgString("-fsanitize-annotate-debug-info=" +
+                                         toString(AnnotateDebugInfo)));
+
   addSpecialCaseListOpt(Args, CmdArgs,
                         "-fsanitize-ignorelist=", UserIgnorelistFiles);
   addSpecialCaseListOpt(Args, CmdArgs,
@@ -1534,7 +1548,10 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
        A->getOption().matches(options::OPT_fsanitize_trap_EQ) ||
        A->getOption().matches(options::OPT_fno_sanitize_trap_EQ) ||
        A->getOption().matches(options::OPT_fsanitize_merge_handlers_EQ) ||
-       A->getOption().matches(options::OPT_fno_sanitize_merge_handlers_EQ)) &&
+       A->getOption().matches(options::OPT_fno_sanitize_merge_handlers_EQ) ||
+       A->getOption().matches(options::OPT_fsanitize_annotate_debug_info_EQ) ||
+       A->getOption().matches(
+           options::OPT_fno_sanitize_annotate_debug_info_EQ)) &&
       "Invalid argument in parseArgValues!");
   SanitizerMask Kinds;
   for (int i = 0, n = A->getNumValues(); i != n; ++i) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index c7d11e6027ccf..a0b8bbf9d827f 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1838,6 +1838,10 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
   for (std::string Sanitizer : Values)
     GenerateArg(Consumer, OPT_fsanitize_skip_hot_cutoff_EQ, Sanitizer);
 
+  for (StringRef Sanitizer :
+       serializeSanitizerKinds(Opts.SanitizeAnnotateDebugInfo))
+    GenerateArg(Consumer, OPT_fsanitize_annotate_debug_info_EQ, Sanitizer);
+
   if (!Opts.EmitVersionIdentMetadata)
     GenerateArg(Consumer, OPT_Qn);
 
@@ -2332,6 +2336,11 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
       "-fsanitize-skip-hot-cutoff=",
       Args.getAllArgValues(OPT_fsanitize_skip_hot_cutoff_EQ), Diags);
 
+  parseSanitizerKinds(
+      "-fsanitize-annotate-debug-info=",
+      Args.getAllArgValues(OPT_fsanitize_annotate_debug_info_EQ), Diags,
+      Opts.SanitizeAnnotateDebugInfo);
+
   Opts.EmitVersionIdentMetadata = Args.hasFlag(OPT_Qy, OPT_Qn, true);
 
   if (!LangOpts->CUDAIsDevice)
diff --git a/clang/test/CodeGen/bounds-checking-debuginfo.c b/clang/test/CodeGen/bounds-checking-debuginfo.c
index 4f5ba2b76eeeb..74c06665dfe02 100644
--- a/clang/test/CodeGen/bounds-checking-debuginfo.c
+++ b/clang/test/CodeGen/bounds-checking-debuginfo.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -mllvm -array-bounds-pseudofn -emit-llvm -fdebug-prefix-map=%S/= -fno-ident -fdebug-compilation-dir=%S -fsanitize=array-bounds -fsanitize-trap=array-bounds -triple x86_64 -debug-info-kind=limited %s -o - | FileCheck --check-prefix=CHECK-TRAP %s
-// RUN: %clang_cc1 -mllvm -array-bounds-pseudofn -emit-llvm -fdebug-prefix-map=%S/= -fno-ident -fdebug-compilation-dir=%S -fsanitize=array-bounds                              -triple x86_64 -debug-info-kind=limited %s -o - | FileCheck --check-prefix=CHECK-NOTRAP %s
 
+// RUN: %clang_cc1 -emit-llvm -fdebug-prefix-map=%S/= -fno-ident -fdebug-compilation-dir=%S -fsanitize=array-bounds -fsanitize-trap=array-bounds -fsanitize-annotate-debug-info=array-bounds -triple x86_64 -debug-info-kind=limited %s -o - | FileCheck --check-prefix=CHECK-TRAP %s
+// RUN: %clang_cc1 -emit-llvm -fdebug-prefix-map=%S/= -fno-ident -fdebug-compilation-dir=%S -fsanitize=array-bounds                              -fsanitize-annotate-debug-info=array-bounds -triple x86_64 -debug-info-kind=limited %s -o - | FileCheck --check-prefix=CHECK-NOTRAP %s
 
 int f();
 void d(double*);
diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c
index eb72140fb1315..24d64c94c0956 100644
--- a/clang/test/Driver/fsanitize.c
+++ b/clang/test/Driver/fsanitize.c
@@ -1,3 +1,5 @@
+// * Test -fsanitize-trap */
+
 // RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP
 // RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-trap=signed-integer-overflow %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP2
 // RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-undefined-trap-on-error %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-TRAP
@@ -9,6 +11,9 @@
 // CHECK-UNDEFINED-TRAP: "-fsanitize-trap=alignment,array-bounds,bool,builtin,enum,float-cast-overflow,function,integer-divide-by-zero,nonnull-attribute,null,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,vla-bound"
 // CHECK-UNDEFINED-TRAP2: "-fsanitize-trap=alignment,array-bounds,bool,builtin,enum,float-cast-overflow,function,integer-divide-by-zero,nonnull-attribute,null,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,unreachable,vla-bound"
 
+
+// * Test -fsanitize-merge *
+
 // The trailing -fsanitize-merge takes precedence
 // RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                                                              %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-MERGE
 // RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                             -fsanitize-merge                                 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-MERGE
@@ -62,6 +67,59 @@
 // RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-merge=signed-integer-overflow -fno-sanitize-merge=undefined -fsanitize-merge=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-MERGE5
 // CHECK-UNDEFINED-MERGE5: "-fsanitize-merge=alignment,null"
 
+
+// * Test -fsanitize-annotate-debug-info *
+
+// The trailing -fsanitize-annotate-debug-info takes precedence
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                            -fsanitize-annotate-debug-info                                 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                            -fsanitize-annotate-debug-info=undefined                       %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info                         -fsanitize-annotate-debug-info                                 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info                         -fsanitize-annotate-debug-info=undefined                       %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info=undefined               -fsanitize-annotate-debug-info                                 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info=undefined               -fsanitize-annotate-debug-info=undefined                       %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info=signed-integer-overflow -fsanitize-annotate-debug-info                                 %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info=bool                       -fsanitize-annotate-debug-info=undefined                       %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                            -fsanitize-annotate-debug-info=undefined -fsanitize-annotate-debug-info=bool %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO
+// CHECK-UNDEFINED-PSEUDO: "-fsanitize-annotate-debug-info=alignment,array-bounds,bool,builtin,enum,float-cast-overflow,function,integer-divide-by-zero,nonnull-attribute,null,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,vla-bound"
+
+// The trailing arguments (-fsanitize-annotate-debug-info -fno-sanitize-annotate-debug-info=signed-integer-overflow) take precedence
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                            -fsanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info=signed-integer-overflow %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO2
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                            -fsanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info=signed-integer-overflow %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO2
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info                         -fsanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info=signed-integer-overflow %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO2
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info                         -fsanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info=signed-integer-overflow %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO2
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info=signed-integer-overflow -fsanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info=signed-integer-overflow %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO2
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info=signed-integer-overflow -fsanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info=signed-integer-overflow %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO2
+// CHECK-UNDEFINED-PSEUDO2: "-fsanitize-annotate-debug-info=alignment,array-bounds,bool,builtin,enum,float-cast-overflow,function,integer-divide-by-zero,nonnull-attribute,null,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,unreachable,vla-bound"
+
+// The trailing -fno-sanitize-annotate-debug-info takes precedence
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                           -fno-sanitize-annotate-debug-info                                    %s -### 2>&1 | not FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO3
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                           -fno-sanitize-annotate-debug-info=undefined                          %s -### 2>&1 | not FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO3
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                           -fno-sanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info=bool %s -### 2>&1 | not FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO3
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                           -fno-sanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info=bool %s -### 2>&1 | not FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO3
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info                                    %s -### 2>&1 | not FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO3
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info=undefined                          %s -### 2>&1 | not FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO3
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info                                    %s -### 2>&1 | not FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO3
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info=undefined                          %s -### 2>&1 | not FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO3
+// CHECK-UNDEFINED-PSEUDO3: "-fsanitize-annotate-debug-info"
+
+// The trailing arguments (-fsanitize-annotate-debug-info -fno-sanitize-annotate-debug-info=alignment,null) take precedence
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                            -fsanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO4
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                            -fsanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO4
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info                         -fsanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO4
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info                         -fsanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO4
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info=signed-integer-overflow -fsanitize-annotate-debug-info           -fno-sanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO4
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fno-sanitize-annotate-debug-info=signed-integer-overflow -fsanitize-annotate-debug-info=undefined -fno-sanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO4
+// CHECK-UNDEFINED-PSEUDO4: "-fsanitize-annotate-debug-info=array-bounds,bool,builtin,enum,float-cast-overflow,function,integer-divide-by-zero,nonnull-attribute,pointer-overflow,return,returns-nonnull-attribute,shift-base,shift-exponent,signed-integer-overflow,unreachable,vla-bound"
+
+// The trailing arguments (-fno-sanitize-annotate-debug-info -fsanitize-annotate-debug-info=alignment,null) take precedence
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined                                                         -fno-sanitize-annotate-debug-info=undefined -fsanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO5
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info                         -fno-sanitize-annotate-debug-info           -fsanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO5
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info                         -fno-sanitize-annotate-debug-info=undefined -fsanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO5
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info=signed-integer-overflow -fno-sanitize-annotate-debug-info           -fsanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO5
+// RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined -fsanitize-trap=undefined -fsanitize-annotate-debug-info=signed-integer-overflow -fno-sanitize-annotate-debug-info=undefined -fsanitize-annotate-debug-info=alignment,null %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-PSEUDO5
+// CHECK-UNDEFINED-PSEUDO5: "-fsanitize-annotate-debug-info=alignment,null"
+
+
 // RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED
 // CHECK-UNDEFINED: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|function|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|builtin|returns-nonnull-attribute|nonnull-attribute),?){18}"}}
 

>From 7313c3b1f1454b408d9f845a1c6aa40c5b36cae9 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 7 May 2025 16:16:16 -0700
Subject: [PATCH 064/115] [mlir] Use llvm::bit_width (NFC) (#138648)

Note that bit_width is implemented as:

  std::numeric_limits<T>::digits - llvm::countl_zero(Value);
---
 mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
index fca2629d72efc..d0a3f01afe871 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
@@ -122,7 +122,7 @@ class I64BitSet {
     unsigned m = llvm::countr_zero(storage);
     return m == 64 ? -1 : m;
   }
-  unsigned max() const { return 64 - llvm::countl_zero(storage); }
+  unsigned max() const { return llvm::bit_width(storage); }
   unsigned count() const { return llvm::popcount(storage); }
   bool empty() const { return storage == 0; }
 };

>From aeeb9a3c09f40f42a1e8e5e3c8dbde3b260744bd Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Tue, 6 May 2025 15:58:44 -0700
Subject: [PATCH 065/115] [lldb-dap] Change the launch sequence (#138219)
 (reland)

This PR changes how we treat the launch sequence in lldb-dap.

 - Send the initialized event after we finish handling the initialize
   request, rather than after we finish attaching or launching.
 - Delay handling the launch and attach request until we have handled
   the configurationDone request. The latter is now largely a NO-OP and
   only exists to signal lldb-dap that it can handle the launch and
   attach requests.
 - Delay handling the initial threads requests until we have handled
   the launch or attach request.
 - Make all attaching and launching synchronous, including when we have
   attach or launch commands. This removes the need to synchronize
   between the request and event thread.

Background:
https://discourse.llvm.org/t/reliability-of-the-lldb-dap-tests/86125
---
 .../test/tools/lldb-dap/dap_server.py         |  65 +++++-----
 .../test/tools/lldb-dap/lldbdap_testcase.py   |  55 +++++++++
 .../tools/lldb-dap/attach/TestDAP_attach.py   |   2 +
 .../attach/TestDAP_attachByPortNum.py         |   8 +-
 .../TestDAP_breakpointEvents.py               |  61 +++-------
 .../completions/TestDAP_completions.py        |  26 ++--
 .../tools/lldb-dap/console/TestDAP_console.py |  11 +-
 .../console/TestDAP_redirection_to_console.py |   4 +-
 .../lldb-dap/disconnect/TestDAP_disconnect.py |   6 +-
 .../lldb-dap/evaluate/TestDAP_evaluate.py     |   5 +-
 .../lldb-dap/exception/TestDAP_exception.py   |   3 +-
 .../tools/lldb-dap/launch/TestDAP_launch.py   |   7 +-
 .../lldb-dap/progress/TestDAP_Progress.py     |   2 +-
 .../repl-mode/TestDAP_repl_mode_detection.py  |   2 +-
 .../tools/lldb-dap/restart/TestDAP_restart.py |   1 -
 .../restart/TestDAP_restart_runInTerminal.py  |   1 -
 .../lldb-dap/send-event/TestDAP_sendEvent.py  |   7 +-
 .../lldb-dap/stackTrace/TestDAP_stackTrace.py |   2 +-
 .../TestDAP_stackTraceDisassemblyDisplay.py   |   2 +-
 .../startDebugging/TestDAP_startDebugging.py  |   3 +-
 .../lldb-dap/stop-hooks/TestDAP_stop_hooks.py |   2 +-
 .../children/TestDAP_variables_children.py    |   4 +-
 lldb/tools/lldb-dap/DAP.cpp                   |  39 ++++--
 lldb/tools/lldb-dap/DAP.h                     |   8 +-
 lldb/tools/lldb-dap/EventHelper.cpp           |   2 +-
 .../lldb-dap/Handler/AttachRequestHandler.cpp | 115 ++++++++++--------
 .../ConfigurationDoneRequestHandler.cpp       |  14 +--
 .../Handler/InitializeRequestHandler.cpp      |  44 +++----
 .../lldb-dap/Handler/LaunchRequestHandler.cpp |   7 +-
 .../tools/lldb-dap/Handler/RequestHandler.cpp |  67 ++++++----
 lldb/tools/lldb-dap/Handler/RequestHandler.h  |   1 +
 31 files changed, 332 insertions(+), 244 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 6d9ab770684f1..e10342b72f4f0 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -132,7 +132,6 @@ def __init__(self, recv, send, init_commands, log_file=None):
         self.exit_status = None
         self.initialize_body = None
         self.thread_stop_reasons = {}
-        self.breakpoint_events = []
         self.progress_events = []
         self.reverse_requests = []
         self.module_events = []
@@ -244,13 +243,6 @@ def handle_recv_packet(self, packet):
                 self._process_stopped()
                 tid = body["threadId"]
                 self.thread_stop_reasons[tid] = body
-            elif event == "breakpoint":
-                # Breakpoint events come in when a breakpoint has locations
-                # added or removed. Keep track of them so we can look for them
-                # in tests.
-                self.breakpoint_events.append(packet)
-                # no need to add 'breakpoint' event packets to our packets list
-                return keepGoing
             elif event.startswith("progress"):
                 # Progress events come in as 'progressStart', 'progressUpdate',
                 # and 'progressEnd' events. Keep these around in case test
@@ -412,6 +404,15 @@ def wait_for_stopped(self, timeout=None):
             self.threads = []
         return stopped_events
 
+    def wait_for_breakpoint_events(self, timeout=None):
+        breakpoint_events = []
+        while True:
+            event = self.wait_for_event("breakpoint", timeout=timeout)
+            if not event:
+                break
+            breakpoint_events.append(event)
+        return breakpoint_events
+
     def wait_for_exited(self):
         event_dict = self.wait_for_event("exited")
         if event_dict is None:
@@ -591,6 +592,7 @@ def request_attach(
         attachCommands=None,
         terminateCommands=None,
         coreFile=None,
+        stopOnAttach=True,
         postRunCommands=None,
         sourceMap=None,
         gdbRemotePort=None,
@@ -620,6 +622,8 @@ def request_attach(
             args_dict["attachCommands"] = attachCommands
         if coreFile:
             args_dict["coreFile"] = coreFile
+        if stopOnAttach:
+            args_dict["stopOnEntry"] = stopOnAttach
         if postRunCommands:
             args_dict["postRunCommands"] = postRunCommands
         if sourceMap:
@@ -632,7 +636,7 @@ def request_attach(
         response = self.send_recv(command_dict)
 
         if response["success"]:
-            self.wait_for_events(["process", "initialized"])
+            self.wait_for_event("process")
         return response
 
     def request_breakpointLocations(
@@ -666,10 +670,6 @@ def request_configurationDone(self):
         response = self.send_recv(command_dict)
         if response:
             self.configuration_done_sent = True
-            # Client requests the baseline of currently existing threads after
-            # a successful launch or attach.
-            # Kick off the threads request that follows
-            self.request_threads()
         return response
 
     def _process_stopped(self):
@@ -887,7 +887,7 @@ def request_launch(
         response = self.send_recv(command_dict)
 
         if response["success"]:
-            self.wait_for_events(["process", "initialized"])
+            self.wait_for_event("process")
         return response
 
     def request_next(self, threadId, granularity="statement"):
@@ -1325,6 +1325,26 @@ def attach_options_specified(options):
 
 def run_vscode(dbg, args, options):
     dbg.request_initialize(options.sourceInitFile)
+
+    if options.sourceBreakpoints:
+        source_to_lines = {}
+        for file_line in options.sourceBreakpoints:
+            (path, line) = file_line.split(":")
+            if len(path) == 0 or len(line) == 0:
+                print('error: invalid source with line "%s"' % (file_line))
+
+            else:
+                if path in source_to_lines:
+                    source_to_lines[path].append(int(line))
+                else:
+                    source_to_lines[path] = [int(line)]
+        for source in source_to_lines:
+            dbg.request_setBreakpoints(source, source_to_lines[source])
+    if options.funcBreakpoints:
+        dbg.request_setFunctionBreakpoints(options.funcBreakpoints)
+
+    dbg.request_configurationDone()
+
     if attach_options_specified(options):
         response = dbg.request_attach(
             program=options.program,
@@ -1353,23 +1373,6 @@ def run_vscode(dbg, args, options):
         )
 
     if response["success"]:
-        if options.sourceBreakpoints:
-            source_to_lines = {}
-            for file_line in options.sourceBreakpoints:
-                (path, line) = file_line.split(":")
-                if len(path) == 0 or len(line) == 0:
-                    print('error: invalid source with line "%s"' % (file_line))
-
-                else:
-                    if path in source_to_lines:
-                        source_to_lines[path].append(int(line))
-                    else:
-                        source_to_lines[path] = [int(line)]
-            for source in source_to_lines:
-                dbg.request_setBreakpoints(source, source_to_lines[source])
-        if options.funcBreakpoints:
-            dbg.request_setFunctionBreakpoints(options.funcBreakpoints)
-        dbg.request_configurationDone()
         dbg.wait_for_stopped()
     else:
         if "message" in response:
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
index 2c14bb35162b5..c5a7eb76a58c7 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
@@ -340,6 +340,7 @@ def attach(
         exitCommands=None,
         attachCommands=None,
         coreFile=None,
+        stopOnAttach=True,
         disconnectAutomatically=True,
         terminateCommands=None,
         postRunCommands=None,
@@ -348,6 +349,8 @@ def attach(
         expectFailure=False,
         gdbRemotePort=None,
         gdbRemoteHostname=None,
+        sourceBreakpoints=None,
+        functionBreakpoints=None,
     ):
         """Build the default Makefile target, create the DAP debug adapter,
         and attach to the process.
@@ -364,6 +367,28 @@ def cleanup():
         self.addTearDownHook(cleanup)
         # Initialize and launch the program
         self.dap_server.request_initialize(sourceInitFile)
+        self.dap_server.wait_for_event("initialized")
+
+        # Set source breakpoints as part of the launch sequence.
+        if sourceBreakpoints:
+            for source_path, lines in sourceBreakpoints:
+                response = self.dap_server.request_setBreakpoints(source_path, lines)
+                self.assertTrue(
+                    response["success"],
+                    "setBreakpoints failed (%s)" % (response),
+                )
+
+        # Set function breakpoints as part of the launch sequence.
+        if functionBreakpoints:
+            response = self.dap_server.request_setFunctionBreakpoints(
+                functionBreakpoints
+            )
+            self.assertTrue(
+                response["success"],
+                "setFunctionBreakpoint failed (%s)" % (response),
+            )
+
+        self.dap_server.request_configurationDone()
         response = self.dap_server.request_attach(
             program=program,
             pid=pid,
@@ -376,6 +401,7 @@ def cleanup():
             attachCommands=attachCommands,
             terminateCommands=terminateCommands,
             coreFile=coreFile,
+            stopOnAttach=stopOnAttach,
             postRunCommands=postRunCommands,
             sourceMap=sourceMap,
             gdbRemotePort=gdbRemotePort,
@@ -419,6 +445,8 @@ def launch(
         commandEscapePrefix=None,
         customFrameFormat=None,
         customThreadFormat=None,
+        sourceBreakpoints=None,
+        functionBreakpoints=None,
     ):
         """Sending launch request to dap"""
 
@@ -434,6 +462,29 @@ def cleanup():
 
         # Initialize and launch the program
         self.dap_server.request_initialize(sourceInitFile)
+        self.dap_server.wait_for_event("initialized")
+
+        # Set source breakpoints as part of the launch sequence.
+        if sourceBreakpoints:
+            for source_path, lines in sourceBreakpoints:
+                response = self.dap_server.request_setBreakpoints(source_path, lines)
+                self.assertTrue(
+                    response["success"],
+                    "setBreakpoints failed (%s)" % (response),
+                )
+
+        # Set function breakpoints as part of the launch sequence.
+        if functionBreakpoints:
+            response = self.dap_server.request_setFunctionBreakpoints(
+                functionBreakpoints
+            )
+            self.assertTrue(
+                response["success"],
+                "setFunctionBreakpoint failed (%s)" % (response),
+            )
+
+        self.dap_server.request_configurationDone()
+
         response = self.dap_server.request_launch(
             program,
             args=args,
@@ -504,6 +555,8 @@ def build_and_launch(
         customThreadFormat=None,
         launchCommands=None,
         expectFailure=False,
+        sourceBreakpoints=None,
+        functionBreakpoints=None,
     ):
         """Build the default Makefile target, create the DAP debug adapter,
         and launch the process.
@@ -540,6 +593,8 @@ def build_and_launch(
             customThreadFormat=customThreadFormat,
             launchCommands=launchCommands,
             expectFailure=expectFailure,
+            sourceBreakpoints=sourceBreakpoints,
+            functionBreakpoints=functionBreakpoints,
         )
 
     def getBuiltinDebugServerTool(self):
diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
index f48d5a7db3c50..741c011a3d692 100644
--- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
+++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
@@ -27,6 +27,8 @@ def spawn_and_wait(program, delay):
 @skip
 class TestDAP_attach(lldbdap_testcase.DAPTestCaseBase):
     def set_and_hit_breakpoint(self, continueToExit=True):
+        self.dap_server.wait_for_stopped()
+
         source = "main.c"
         breakpoint1_line = line_number(source, "// breakpoint 1")
         lines = [breakpoint1_line]
diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py
index 7f93b9f2a3a22..7250e67ebcd8c 100644
--- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py
+++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py
@@ -18,17 +18,17 @@
 import socket
 
 
- at skip
 class TestDAP_attachByPortNum(lldbdap_testcase.DAPTestCaseBase):
     default_timeout = 20
 
     def set_and_hit_breakpoint(self, continueToExit=True):
+        self.dap_server.wait_for_stopped()
+
         source = "main.c"
-        main_source_path = os.path.join(os.getcwd(), source)
-        breakpoint1_line = line_number(main_source_path, "// breakpoint 1")
+        breakpoint1_line = line_number(source, "// breakpoint 1")
         lines = [breakpoint1_line]
         # Set breakpoint in the thread function so we can step the threads
-        breakpoint_ids = self.set_source_breakpoints(main_source_path, lines)
+        breakpoint_ids = self.set_source_breakpoints(source, lines)
         self.assertEqual(
             len(breakpoint_ids), len(lines), "expect correct number of breakpoints"
         )
diff --git a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
index e5590e1b332a0..8581f10cef22a 100644
--- a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
@@ -81,52 +81,27 @@ def test_breakpoint_events(self):
                 breakpoint["verified"], "expect foo breakpoint to not be verified"
             )
 
-        # Get the stop at the entry point
-        self.continue_to_next_stop()
+        # Make sure we're stopped.
+        self.dap_server.wait_for_stopped()
 
-        # We are now stopped at the entry point to the program. Shared
-        # libraries are not loaded yet (at least on macOS they aren't) and only
-        # the breakpoint in the main executable should be resolved.
-        self.assertEqual(len(self.dap_server.breakpoint_events), 1)
-        event = self.dap_server.breakpoint_events[0]
-        body = event["body"]
-        self.assertEqual(
-            body["reason"], "changed", "breakpoint event should say changed"
-        )
-        breakpoint = body["breakpoint"]
-        self.assertEqual(breakpoint["id"], main_bp_id)
-        self.assertTrue(breakpoint["verified"], "main breakpoint should be resolved")
-
-        # Clear the list of breakpoint events so we don't see this one again.
-        self.dap_server.breakpoint_events.clear()
+        # Flush the breakpoint events.
+        self.dap_server.wait_for_breakpoint_events(timeout=5)
 
         # Continue to the breakpoint
         self.continue_to_breakpoints(dap_breakpoint_ids)
 
-        # When the process launches, we first expect to see both the main and
-        # foo breakpoint as unresolved.
-        for event in self.dap_server.breakpoint_events[:2]:
-            body = event["body"]
-            self.assertEqual(
-                body["reason"], "changed", "breakpoint event should say changed"
-            )
-            breakpoint = body["breakpoint"]
-            self.assertIn(str(breakpoint["id"]), dap_breakpoint_ids)
-            self.assertFalse(breakpoint["verified"], "breakpoint should be unresolved")
+        verified_breakpoint_ids = []
+        unverified_breakpoint_ids = []
+        for breakpoint_event in self.dap_server.wait_for_breakpoint_events(timeout=5):
+            breakpoint = breakpoint_event["body"]["breakpoint"]
+            id = breakpoint["id"]
+            if breakpoint["verified"]:
+                verified_breakpoint_ids.append(id)
+            else:
+                unverified_breakpoint_ids.append(id)
 
-        # Then, once the dynamic loader has given us a load address, they
-        # should show up as resolved again.
-        for event in self.dap_server.breakpoint_events[3:]:
-            body = event["body"]
-            self.assertEqual(
-                body["reason"], "changed", "breakpoint event should say changed"
-            )
-            breakpoint = body["breakpoint"]
-            self.assertIn(str(breakpoint["id"]), dap_breakpoint_ids)
-            self.assertTrue(breakpoint["verified"], "breakpoint should be resolved")
-            self.assertNotIn(
-                "source",
-                breakpoint,
-                "breakpoint event should not return a source object",
-            )
-            self.assertIn("line", breakpoint, "breakpoint event should have line")
+        self.assertIn(main_bp_id, unverified_breakpoint_ids)
+        self.assertIn(foo_bp_id, unverified_breakpoint_ids)
+
+        self.assertIn(main_bp_id, verified_breakpoint_ids)
+        self.assertIn(foo_bp_id, verified_breakpoint_ids)
diff --git a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
index 210e591bff426..a94288c7a669e 100644
--- a/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
+++ b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
@@ -2,7 +2,6 @@
 Test lldb-dap completions request
 """
 
-
 import lldbdap_testcase
 import dap_server
 from lldbsuite.test import lldbutil
@@ -32,6 +31,7 @@
 variable_var1_completion = {"text": "var1", "label": "var1 -- int &"}
 variable_var2_completion = {"text": "var2", "label": "var2 -- int &"}
 
+
 # Older version of libcxx produce slightly different typename strings for
 # templates like vector.
 @skipIf(compiler="clang", compiler_version=["<", "16.0"])
@@ -43,16 +43,22 @@ def verify_completions(self, actual_list, expected_list, not_expected_list=[]):
         for not_expected_item in not_expected_list:
             self.assertNotIn(not_expected_item, actual_list)
 
-
-    def setup_debugee(self):
+    def setup_debugee(self, stopOnEntry=False):
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program)
-
         source = "main.cpp"
-        breakpoint1_line = line_number(source, "// breakpoint 1")
-        breakpoint2_line = line_number(source, "// breakpoint 2")
-
-        self.set_source_breakpoints(source, [breakpoint1_line, breakpoint2_line])
+        self.build_and_launch(
+            program,
+            stopOnEntry=stopOnEntry,
+            sourceBreakpoints=[
+                (
+                    source,
+                    [
+                        line_number(source, "// breakpoint 1"),
+                        line_number(source, "// breakpoint 2"),
+                    ],
+                ),
+            ],
+        )
 
     def test_command_completions(self):
         """
@@ -235,7 +241,7 @@ def test_auto_completions(self):
         """
         Tests completion requests in "repl-mode=auto"
         """
-        self.setup_debugee()
+        self.setup_debugee(stopOnEntry=True)
 
         res = self.dap_server.request_evaluate(
             "`lldb-dap repl-mode auto", context="repl"
diff --git a/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py b/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
index b07c4f871d73b..8642e317f9b3a 100644
--- a/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
+++ b/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
@@ -19,6 +19,7 @@ def get_subprocess(root_process, process_name):
 
     self.assertTrue(False, "No subprocess with name %s found" % process_name)
 
+
 class TestDAP_console(lldbdap_testcase.DAPTestCaseBase):
     def check_lldb_command(
         self, lldb_command, contains_string, assert_msg, command_escape_prefix="`"
@@ -52,7 +53,7 @@ def test_scopes_variables_setVariable_evaluate(self):
         character.
         """
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program)
+        self.build_and_launch(program, stopOnEntry=True)
         source = "main.cpp"
         breakpoint1_line = line_number(source, "// breakpoint 1")
         lines = [breakpoint1_line]
@@ -81,7 +82,7 @@ def test_scopes_variables_setVariable_evaluate(self):
 
     def test_custom_escape_prefix(self):
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program, commandEscapePrefix="::")
+        self.build_and_launch(program, stopOnEntry=True, commandEscapePrefix="::")
         source = "main.cpp"
         breakpoint1_line = line_number(source, "// breakpoint 1")
         breakpoint_ids = self.set_source_breakpoints(source, [breakpoint1_line])
@@ -96,7 +97,7 @@ def test_custom_escape_prefix(self):
 
     def test_empty_escape_prefix(self):
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program, commandEscapePrefix="")
+        self.build_and_launch(program, stopOnEntry=True, commandEscapePrefix="")
         source = "main.cpp"
         breakpoint1_line = line_number(source, "// breakpoint 1")
         breakpoint_ids = self.set_source_breakpoints(source, [breakpoint1_line])
@@ -113,7 +114,7 @@ def test_empty_escape_prefix(self):
     def test_exit_status_message_sigterm(self):
         source = "main.cpp"
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program, commandEscapePrefix="")
+        self.build_and_launch(program, stopOnEntry=True, commandEscapePrefix="")
         breakpoint1_line = line_number(source, "// breakpoint 1")
         breakpoint_ids = self.set_source_breakpoints(source, [breakpoint1_line])
         self.continue_to_breakpoints(breakpoint_ids)
@@ -167,7 +168,7 @@ def test_exit_status_message_ok(self):
 
     def test_diagnositcs(self):
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program)
+        self.build_and_launch(program, stopOnEntry=True)
 
         core = self.getBuildArtifact("minidump.core")
         self.yaml2obj("minidump.yaml", core)
diff --git a/lldb/test/API/tools/lldb-dap/console/TestDAP_redirection_to_console.py b/lldb/test/API/tools/lldb-dap/console/TestDAP_redirection_to_console.py
index e367c327d4295..23500bd6fe586 100644
--- a/lldb/test/API/tools/lldb-dap/console/TestDAP_redirection_to_console.py
+++ b/lldb/test/API/tools/lldb-dap/console/TestDAP_redirection_to_console.py
@@ -16,7 +16,9 @@ def test(self):
         """
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(
-            program, lldbDAPEnv={"LLDB_DAP_TEST_STDOUT_STDERR_REDIRECTION": ""}
+            program,
+            stopOnEntry=True,
+            lldbDAPEnv={"LLDB_DAP_TEST_STDOUT_STDERR_REDIRECTION": ""},
         )
 
         source = "main.cpp"
diff --git a/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py b/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py
index 0cb792d662a80..09e3f62f0eead 100644
--- a/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py
+++ b/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py
@@ -31,7 +31,7 @@ def test_launch(self):
         created.
         """
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program, disconnectAutomatically=False)
+        self.build_and_launch(program, stopOnEntry=True, disconnectAutomatically=False)
 
         # We set a breakpoint right before the side effect file is created
         self.set_source_breakpoints(
@@ -39,7 +39,11 @@ def test_launch(self):
         )
         self.continue_to_next_stop()
 
+        # verify we haven't produced the side effect file yet
+        self.assertFalse(os.path.exists(program + ".side_effect"))
+
         self.dap_server.request_disconnect()
+
         # verify we didn't produce the side effect file
         time.sleep(1)
         self.assertFalse(os.path.exists(program + ".side_effect"))
diff --git a/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py b/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
index d97fda730c46a..19b682dfcd22d 100644
--- a/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
+++ b/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
@@ -10,6 +10,7 @@
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 
+
 # DAP tests are flakey, see https://github.com/llvm/llvm-project/issues/137660.
 @skip
 class TestDAP_evaluate(lldbdap_testcase.DAPTestCaseBase):
@@ -42,7 +43,9 @@ def run_test_evaluate_expressions(
         self.context = context
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(
-            program, enableAutoVariableSummaries=enableAutoVariableSummaries
+            program,
+            enableAutoVariableSummaries=enableAutoVariableSummaries,
+            stopOnEntry=True,
         )
         source = "main.cpp"
         self.set_source_breakpoints(
diff --git a/lldb/test/API/tools/lldb-dap/exception/TestDAP_exception.py b/lldb/test/API/tools/lldb-dap/exception/TestDAP_exception.py
index 39d73737b7e8c..ec7387dabb0c2 100644
--- a/lldb/test/API/tools/lldb-dap/exception/TestDAP_exception.py
+++ b/lldb/test/API/tools/lldb-dap/exception/TestDAP_exception.py
@@ -2,7 +2,6 @@
 Test exception behavior in DAP with signal.
 """
 
-
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 import lldbdap_testcase
@@ -17,7 +16,7 @@ def test_stopped_description(self):
         """
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(program)
-        self.dap_server.request_continue()
+
         self.assertTrue(self.verify_stop_exception_info("signal SIGABRT"))
         exceptionInfo = self.get_exceptionInfo()
         self.assertEqual(exceptionInfo["breakMode"], "always")
diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
index 931456299e03e..e8e9181f8da8d 100644
--- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
+++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
@@ -15,6 +15,7 @@
 # Despite the test program printing correctly. See
 # https://github.com/llvm/llvm-project/issues/137599.
 
+
 class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     def test_default(self):
@@ -88,8 +89,8 @@ def test_stopOnEntry(self):
         """
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(program, stopOnEntry=True)
-        self.set_function_breakpoints(["main"])
-        stopped_events = self.continue_to_next_stop()
+
+        stopped_events = self.dap_server.wait_for_stopped()
         for stopped_event in stopped_events:
             if "body" in stopped_event:
                 body = stopped_event["body"]
@@ -357,6 +358,7 @@ def test_commands(self):
         terminateCommands = ["expr 4+2"]
         self.build_and_launch(
             program,
+            stopOnEntry=True,
             initCommands=initCommands,
             preRunCommands=preRunCommands,
             postRunCommands=postRunCommands,
@@ -530,6 +532,7 @@ def test_terminate_commands(self):
         terminateCommands = ["expr 4+2"]
         self.launch(
             program=program,
+            stopOnEntry=True,
             terminateCommands=terminateCommands,
             disconnectAutomatically=False,
         )
diff --git a/lldb/test/API/tools/lldb-dap/progress/TestDAP_Progress.py b/lldb/test/API/tools/lldb-dap/progress/TestDAP_Progress.py
index fee63655de0da..0f94b50c31fba 100755
--- a/lldb/test/API/tools/lldb-dap/progress/TestDAP_Progress.py
+++ b/lldb/test/API/tools/lldb-dap/progress/TestDAP_Progress.py
@@ -50,7 +50,7 @@ def verify_progress_events(
     @skipIfWindows
     def test(self):
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program)
+        self.build_and_launch(program, stopOnEntry=True)
         progress_emitter = os.path.join(os.getcwd(), "Progress_emitter.py")
         self.dap_server.request_evaluate(
             f"`command script import {progress_emitter}", context="repl"
diff --git a/lldb/test/API/tools/lldb-dap/repl-mode/TestDAP_repl_mode_detection.py b/lldb/test/API/tools/lldb-dap/repl-mode/TestDAP_repl_mode_detection.py
index c6f59949d668e..81edcdf4bd0f9 100644
--- a/lldb/test/API/tools/lldb-dap/repl-mode/TestDAP_repl_mode_detection.py
+++ b/lldb/test/API/tools/lldb-dap/repl-mode/TestDAP_repl_mode_detection.py
@@ -20,7 +20,7 @@ def assertEvaluate(self, expression, regex):
 
     def test_completions(self):
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program)
+        self.build_and_launch(program, stopOnEntry=True)
 
         source = "main.cpp"
         breakpoint1_line = line_number(source, "// breakpoint 1")
diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py
index 36fa0bd40183f..5f95c7bfb1556 100644
--- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py
+++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py
@@ -22,7 +22,6 @@ def test_basic_functionality(self):
         [bp_A, bp_B] = self.set_source_breakpoints("main.c", [line_A, line_B])
 
         # Verify we hit A, then B.
-        self.dap_server.request_configurationDone()
         self.verify_breakpoint_hit([bp_A])
         self.dap_server.request_continue()
         self.verify_breakpoint_hit([bp_B])
diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py
index a94c9860c1508..eed769a5a0cc6 100644
--- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py
+++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py
@@ -74,7 +74,6 @@ def test_stopOnEntry(self):
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(program, runInTerminal=True, stopOnEntry=True)
         [bp_main] = self.set_function_breakpoints(["main"])
-        self.dap_server.request_configurationDone()
 
         # When using stopOnEntry, configurationDone doesn't result in a running
         # process, we should immediately get a stopped event instead.
diff --git a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
index ce262be161861..64cec70aa923b 100644
--- a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
+++ b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
@@ -16,12 +16,14 @@ def test_send_event(self):
         """
         program = self.getBuildArtifact("a.out")
         source = "main.c"
+        breakpoint_line = line_number(source, "// breakpoint")
         custom_event_body = {
             "key": 321,
             "arr": [True],
         }
         self.build_and_launch(
             program,
+            sourceBreakpoints=[(source, [breakpoint_line])],
             stopCommands=[
                 "lldb-dap send-event my-custom-event-no-body",
                 "lldb-dap send-event my-custom-event '{}'".format(
@@ -30,11 +32,6 @@ def test_send_event(self):
             ],
         )
 
-        breakpoint_line = line_number(source, "// breakpoint")
-
-        self.set_source_breakpoints(source, [breakpoint_line])
-        self.continue_to_next_stop()
-
         custom_event = self.dap_server.wait_for_event(
             filter=["my-custom-event-no-body"]
         )
diff --git a/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py b/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
index 4e2a76cf76980..edf4adae14a3b 100644
--- a/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
+++ b/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
@@ -61,7 +61,7 @@ def test_stackTrace(self):
         Tests the 'stackTrace' packet and all its variants.
         """
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program)
+        self.build_and_launch(program, stopOnEntry=True)
         source = "main.c"
         self.source_path = os.path.join(os.getcwd(), source)
         self.recurse_end = line_number(source, "recurse end")
diff --git a/lldb/test/API/tools/lldb-dap/stackTraceDisassemblyDisplay/TestDAP_stackTraceDisassemblyDisplay.py b/lldb/test/API/tools/lldb-dap/stackTraceDisassemblyDisplay/TestDAP_stackTraceDisassemblyDisplay.py
index 08c225b3cada4..963d711978534 100644
--- a/lldb/test/API/tools/lldb-dap/stackTraceDisassemblyDisplay/TestDAP_stackTraceDisassemblyDisplay.py
+++ b/lldb/test/API/tools/lldb-dap/stackTraceDisassemblyDisplay/TestDAP_stackTraceDisassemblyDisplay.py
@@ -37,7 +37,7 @@ def build_and_run_until_breakpoint(self):
             breakpoint_line = line_number(other_source_file, "// Break here")
 
             program = self.getBuildArtifact("a.out")
-            self.build_and_launch(program, commandEscapePrefix="")
+            self.build_and_launch(program, stopOnEntry=True, commandEscapePrefix="")
 
             breakpoint_ids = self.set_source_breakpoints(
                 other_source_file, [breakpoint_line]
diff --git a/lldb/test/API/tools/lldb-dap/startDebugging/TestDAP_startDebugging.py b/lldb/test/API/tools/lldb-dap/startDebugging/TestDAP_startDebugging.py
index fd452d91e472b..e37cd36d7f283 100644
--- a/lldb/test/API/tools/lldb-dap/startDebugging/TestDAP_startDebugging.py
+++ b/lldb/test/API/tools/lldb-dap/startDebugging/TestDAP_startDebugging.py
@@ -2,7 +2,6 @@
 Test lldb-dap start-debugging reverse requests.
 """
 
-
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 import lldbdap_testcase
@@ -16,7 +15,7 @@ def test_startDebugging(self):
         """
         program = self.getBuildArtifact("a.out")
         source = "main.c"
-        self.build_and_launch(program)
+        self.build_and_launch(program, stopOnEntry=True)
 
         breakpoint_line = line_number(source, "// breakpoint")
 
diff --git a/lldb/test/API/tools/lldb-dap/stop-hooks/TestDAP_stop_hooks.py b/lldb/test/API/tools/lldb-dap/stop-hooks/TestDAP_stop_hooks.py
index 70c11a63a79f7..7e28a5af4331c 100644
--- a/lldb/test/API/tools/lldb-dap/stop-hooks/TestDAP_stop_hooks.py
+++ b/lldb/test/API/tools/lldb-dap/stop-hooks/TestDAP_stop_hooks.py
@@ -19,7 +19,7 @@ def test_stop_hooks_before_run(self):
         self.build_and_launch(program, stopOnEntry=True, preRunCommands=preRunCommands)
 
         # The first stop is on entry.
-        self.continue_to_next_stop()
+        self.dap_server.wait_for_stopped()
 
         breakpoint_ids = self.set_function_breakpoints(["main"])
         # This request hangs if the race happens, because, in that case, the
diff --git a/lldb/test/API/tools/lldb-dap/variables/children/TestDAP_variables_children.py b/lldb/test/API/tools/lldb-dap/variables/children/TestDAP_variables_children.py
index a9371e5c5fe68..eb09649f387d7 100644
--- a/lldb/test/API/tools/lldb-dap/variables/children/TestDAP_variables_children.py
+++ b/lldb/test/API/tools/lldb-dap/variables/children/TestDAP_variables_children.py
@@ -13,13 +13,13 @@ def test_get_num_children(self):
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(
             program,
+            stopOnEntry=True,
             preRunCommands=[
                 "command script import '%s'" % self.getSourcePath("formatter.py")
             ],
         )
         source = "main.cpp"
         breakpoint1_line = line_number(source, "// break here")
-        lines = [breakpoint1_line]
 
         breakpoint_ids = self.set_source_breakpoints(
             source, [line_number(source, "// break here")]
@@ -47,7 +47,7 @@ def test_return_variable_with_children(self):
         Test the stepping out of a function with return value show the children correctly
         """
         program = self.getBuildArtifact("a.out")
-        self.build_and_launch(program)
+        self.build_and_launch(program, stopOnEntry=True)
 
         function_name = "test_return_variable_with_children"
         breakpoint_ids = self.set_function_breakpoints([function_name])
diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp
index 4b631484c9fab..62c60cc3a9b3b 100644
--- a/lldb/tools/lldb-dap/DAP.cpp
+++ b/lldb/tools/lldb-dap/DAP.cpp
@@ -84,8 +84,8 @@ DAP::DAP(Log *log, const ReplMode default_repl_mode,
     : log(log), transport(transport), broadcaster("lldb-dap"),
       exception_breakpoints(), focus_tid(LLDB_INVALID_THREAD_ID),
       stop_at_entry(false), is_attach(false),
-      restarting_process_id(LLDB_INVALID_PROCESS_ID),
-      configuration_done_sent(false), waiting_for_run_in_terminal(false),
+      restarting_process_id(LLDB_INVALID_PROCESS_ID), configuration_done(false),
+      waiting_for_run_in_terminal(false),
       progress_event_reporter(
           [&](const ProgressEvent &event) { SendJSON(event.ToJSON()); }),
       reverse_request_seq(0), repl_mode(default_repl_mode) {
@@ -893,10 +893,19 @@ llvm::Error DAP::Loop() {
             return errWrapper;
           }
 
+          // The launch sequence is special and we need to carefully handle
+          // packets in the right order. Until we've handled configurationDone,
+          bool add_to_pending_queue = false;
+
           if (const protocol::Request *req =
-                  std::get_if<protocol::Request>(&*next);
-              req && req->command == "disconnect") {
-            disconnecting = true;
+                  std::get_if<protocol::Request>(&*next)) {
+            llvm::StringRef command = req->command;
+            if (command == "disconnect")
+              disconnecting = true;
+            if (!configuration_done)
+              add_to_pending_queue =
+                  command != "initialize" && command != "configurationDone" &&
+                  command != "disconnect" && !command.ends_with("Breakpoints");
           }
 
           const std::optional<CancelArguments> cancel_args =
@@ -924,7 +933,8 @@ llvm::Error DAP::Loop() {
 
           {
             std::lock_guard<std::mutex> guard(m_queue_mutex);
-            m_queue.push_back(std::move(*next));
+            auto &queue = add_to_pending_queue ? m_pending_queue : m_queue;
+            queue.push_back(std::move(*next));
           }
           m_queue_cv.notify_one();
         }
@@ -938,16 +948,19 @@ llvm::Error DAP::Loop() {
     StopEventHandlers();
   });
 
-  while (!disconnecting || !m_queue.empty()) {
+  while (true) {
     std::unique_lock<std::mutex> lock(m_queue_mutex);
     m_queue_cv.wait(lock, [&] { return disconnecting || !m_queue.empty(); });
 
-    if (m_queue.empty())
+    if (disconnecting && m_queue.empty())
       break;
 
     Message next = m_queue.front();
     m_queue.pop_front();
 
+    // Unlock while we're processing the event.
+    lock.unlock();
+
     if (!HandleObject(next))
       return llvm::createStringError(llvm::inconvertibleErrorCode(),
                                      "unhandled packet");
@@ -1219,6 +1232,16 @@ void DAP::SetConfiguration(const protocol::Configuration &config,
     SetThreadFormat(*configuration.customThreadFormat);
 }
 
+void DAP::SetConfigurationDone() {
+  {
+    std::lock_guard<std::mutex> guard(m_queue_mutex);
+    std::copy(m_pending_queue.begin(), m_pending_queue.end(),
+              std::front_inserter(m_queue));
+    configuration_done = true;
+  }
+  m_queue_cv.notify_all();
+}
+
 void DAP::SetFrameFormat(llvm::StringRef format) {
   if (format.empty())
     return;
diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h
index 88eedb0860cf1..b581ae759b1bc 100644
--- a/lldb/tools/lldb-dap/DAP.h
+++ b/lldb/tools/lldb-dap/DAP.h
@@ -188,7 +188,7 @@ struct DAP {
   // shutting down the entire adapter. When we're restarting, we keep the id of
   // the old process here so we can detect this case and keep running.
   lldb::pid_t restarting_process_id;
-  bool configuration_done_sent;
+  bool configuration_done;
   llvm::StringMap<std::unique_ptr<BaseRequestHandler>> request_handlers;
   bool waiting_for_run_in_terminal;
   ProgressEventReporter progress_event_reporter;
@@ -251,6 +251,8 @@ struct DAP {
   /// Configures the debug adapter for launching/attaching.
   void SetConfiguration(const protocol::Configuration &confing, bool is_attach);
 
+  void SetConfigurationDone();
+
   /// Configure source maps based on the current `DAPConfiguration`.
   void ConfigureSourceMaps();
 
@@ -417,8 +419,10 @@ struct DAP {
   lldb::SBMutex GetAPIMutex() const { return target.GetAPIMutex(); }
 
 private:
-  std::mutex m_queue_mutex;
+  /// Queue for all incoming messages.
   std::deque<protocol::Message> m_queue;
+  std::deque<protocol::Message> m_pending_queue;
+  std::mutex m_queue_mutex;
   std::condition_variable m_queue_cv;
 
   std::mutex m_cancelled_requests_mutex;
diff --git a/lldb/tools/lldb-dap/EventHelper.cpp b/lldb/tools/lldb-dap/EventHelper.cpp
index 2c659f39f4b66..ed2d8700c26b0 100644
--- a/lldb/tools/lldb-dap/EventHelper.cpp
+++ b/lldb/tools/lldb-dap/EventHelper.cpp
@@ -222,7 +222,7 @@ void SendContinuedEvent(DAP &dap) {
 
   // If the focus thread is not set then we haven't reported any thread status
   // to the client, so nothing to report.
-  if (!dap.configuration_done_sent || dap.focus_tid == LLDB_INVALID_THREAD_ID) {
+  if (!dap.configuration_done || dap.focus_tid == LLDB_INVALID_THREAD_ID) {
     return;
   }
 
diff --git a/lldb/tools/lldb-dap/Handler/AttachRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/AttachRequestHandler.cpp
index 7a0f091128e4a..5dc9c3f9772e3 100644
--- a/lldb/tools/lldb-dap/Handler/AttachRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/AttachRequestHandler.cpp
@@ -133,61 +133,70 @@ void AttachRequestHandler::operator()(const llvm::json::Object &request) const {
     dap.SendOutput(OutputType::Console,
                    llvm::StringRef(attach_msg, attach_msg_len));
   }
-  if (attachCommands.empty()) {
-    // No "attachCommands", just attach normally.
 
-    // Disable async events so the attach will be successful when we return from
-    // the launch call and the launch will happen synchronously
+  {
+    // Perform the launch in synchronous mode so that we don't have to worry
+    // about process state changes during the launch.
     ScopeSyncMode scope_sync_mode(dap.debugger);
-
-    if (core_file.empty()) {
-      if ((pid != LLDB_INVALID_PROCESS_ID) &&
-          (gdb_remote_port != invalid_port)) {
-        // If both pid and port numbers are specified.
-        error.SetErrorString("The user can't specify both pid and port");
-      } else if (gdb_remote_port != invalid_port) {
-        // If port is specified and pid is not.
-        lldb::SBListener listener = dap.debugger.GetListener();
-
-        // If the user hasn't provided the hostname property, default localhost
-        // being used.
-        std::string connect_url =
-            llvm::formatv("connect://{0}:", gdb_remote_hostname);
-        connect_url += std::to_string(gdb_remote_port);
-        dap.target.ConnectRemote(listener, connect_url.c_str(), "gdb-remote",
-                                 error);
+    if (attachCommands.empty()) {
+      // No "attachCommands", just attach normally.
+      if (core_file.empty()) {
+        if ((pid != LLDB_INVALID_PROCESS_ID) &&
+            (gdb_remote_port != invalid_port)) {
+          // If both pid and port numbers are specified.
+          error.SetErrorString("The user can't specify both pid and port");
+        } else if (gdb_remote_port != invalid_port) {
+          // If port is specified and pid is not.
+          lldb::SBListener listener = dap.debugger.GetListener();
+
+          // If the user hasn't provided the hostname property, default
+          // localhost being used.
+          std::string connect_url =
+              llvm::formatv("connect://{0}:", gdb_remote_hostname);
+          connect_url += std::to_string(gdb_remote_port);
+          dap.target.ConnectRemote(listener, connect_url.c_str(), "gdb-remote",
+                                   error);
+        } else {
+          // Attach by pid or process name.
+          lldb::SBAttachInfo attach_info;
+          if (pid != LLDB_INVALID_PROCESS_ID)
+            attach_info.SetProcessID(pid);
+          else if (dap.configuration.program.has_value())
+            attach_info.SetExecutable(dap.configuration.program->data());
+          attach_info.SetWaitForLaunch(wait_for, false /*async*/);
+          dap.target.Attach(attach_info, error);
+        }
       } else {
-        // Attach by pid or process name.
-        lldb::SBAttachInfo attach_info;
-        if (pid != LLDB_INVALID_PROCESS_ID)
-          attach_info.SetProcessID(pid);
-        else if (dap.configuration.program.has_value())
-          attach_info.SetExecutable(dap.configuration.program->data());
-        attach_info.SetWaitForLaunch(wait_for, false /*async*/);
-        dap.target.Attach(attach_info, error);
+        dap.target.LoadCore(core_file.data(), error);
       }
     } else {
-      dap.target.LoadCore(core_file.data(), error);
-    }
-  } else {
-    // We have "attachCommands" that are a set of commands that are expected
-    // to execute the commands after which a process should be created. If there
-    // is no valid process after running these commands, we have failed.
-    if (llvm::Error err = dap.RunAttachCommands(attachCommands)) {
-      response["success"] = false;
-      EmplaceSafeString(response, "message", llvm::toString(std::move(err)));
-      dap.SendJSON(llvm::json::Value(std::move(response)));
-      return;
+      // We have "attachCommands" that are a set of commands that are expected
+      // to execute the commands after which a process should be created. If
+      // there is no valid process after running these commands, we have failed.
+      if (llvm::Error err = dap.RunAttachCommands(attachCommands)) {
+        response["success"] = false;
+        EmplaceSafeString(response, "message", llvm::toString(std::move(err)));
+        dap.SendJSON(llvm::json::Value(std::move(response)));
+        return;
+      }
+      // The custom commands might have created a new target so we should use
+      // the selected target after these commands are run.
+      dap.target = dap.debugger.GetSelectedTarget();
     }
-    // The custom commands might have created a new target so we should use the
-    // selected target after these commands are run.
-    dap.target = dap.debugger.GetSelectedTarget();
-
-    // Make sure the process is attached and stopped before proceeding as the
-    // the launch commands are not run using the synchronous mode.
-    error = dap.WaitForProcessToStop(std::chrono::seconds(timeout_seconds));
   }
 
+  // Make sure the process is attached and stopped.
+  error = dap.WaitForProcessToStop(std::chrono::seconds(timeout_seconds));
+
+  // Clients can request a baseline of currently existing threads after
+  // we acknowledge the configurationDone request.
+  // Client requests the baseline of currently existing threads after
+  // a successful or attach by sending a 'threads' request
+  // right after receiving the configurationDone response.
+  // Obtain the list of threads before we resume the process
+  dap.initial_thread_list =
+      GetThreads(dap.target.GetProcess(), dap.thread_format);
+
   if (error.Success() && core_file.empty()) {
     auto attached_pid = dap.target.GetProcess().GetProcessID();
     if (attached_pid == LLDB_INVALID_PROCESS_ID) {
@@ -206,9 +215,17 @@ void AttachRequestHandler::operator()(const llvm::json::Object &request) const {
   }
 
   dap.SendJSON(llvm::json::Value(std::move(response)));
+
+  // FIXME: Move this into PostRun.
   if (error.Success()) {
-    SendProcessEvent(dap, Attach);
-    dap.SendJSON(CreateEventObject("initialized"));
+    if (dap.target.GetProcess().IsValid()) {
+      SendProcessEvent(dap, Attach);
+
+      if (dap.stop_at_entry)
+        SendThreadStoppedEvent(dap);
+      else
+        dap.target.GetProcess().Continue();
+    }
   }
 }
 
diff --git a/lldb/tools/lldb-dap/Handler/ConfigurationDoneRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/ConfigurationDoneRequestHandler.cpp
index f39bbdefdbb95..802c28d7b8904 100644
--- a/lldb/tools/lldb-dap/Handler/ConfigurationDoneRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/ConfigurationDoneRequestHandler.cpp
@@ -47,21 +47,11 @@ namespace lldb_dap {
 
 void ConfigurationDoneRequestHandler::operator()(
     const llvm::json::Object &request) const {
+  dap.SetConfigurationDone();
+
   llvm::json::Object response;
   FillResponse(request, response);
   dap.SendJSON(llvm::json::Value(std::move(response)));
-  dap.configuration_done_sent = true;
-  if (dap.stop_at_entry)
-    SendThreadStoppedEvent(dap);
-  else {
-    // Client requests the baseline of currently existing threads after
-    // a successful launch or attach by sending a 'threads' request
-    // right after receiving the configurationDone response.
-    // Obtain the list of threads before we resume the process
-    dap.initial_thread_list =
-        GetThreads(dap.target.GetProcess(), dap.thread_format);
-    dap.target.GetProcess().Continue();
-  }
 }
 
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp
index ce34c52bcc334..aa947d3cb5ab9 100644
--- a/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/InitializeRequestHandler.cpp
@@ -140,43 +140,28 @@ static void EventThreadFunction(DAP &dap) {
         lldb::SBProcess process = lldb::SBProcess::GetProcessFromEvent(event);
         if (event_mask & lldb::SBProcess::eBroadcastBitStateChanged) {
           auto state = lldb::SBProcess::GetStateFromEvent(event);
+
+          DAP_LOG(dap.log, "State = {0}", state);
           switch (state) {
+          case lldb::eStateConnected:
+          case lldb::eStateDetached:
           case lldb::eStateInvalid:
-            // Not a state event
-            break;
           case lldb::eStateUnloaded:
             break;
-          case lldb::eStateConnected:
-            break;
           case lldb::eStateAttaching:
-            break;
-          case lldb::eStateLaunching:
-            break;
-          case lldb::eStateStepping:
-            break;
           case lldb::eStateCrashed:
-            break;
-          case lldb::eStateDetached:
-            break;
-          case lldb::eStateSuspended:
-            break;
+          case lldb::eStateLaunching:
           case lldb::eStateStopped:
-            // We launch and attach in synchronous mode then the first stop
-            // event will not be delivered. If we use "launchCommands" during a
-            // launch or "attachCommands" during an attach we might some process
-            // stop events which we do not want to send an event for. We will
-            // manually send a stopped event in request_configurationDone(...)
-            // so don't send any before then.
-            if (dap.configuration_done_sent) {
-              // Only report a stopped event if the process was not
-              // automatically restarted.
-              if (!lldb::SBProcess::GetRestartedFromEvent(event)) {
-                SendStdOutStdErr(dap, process);
-                SendThreadStoppedEvent(dap);
-              }
+          case lldb::eStateSuspended:
+            // Only report a stopped event if the process was not
+            // automatically restarted.
+            if (!lldb::SBProcess::GetRestartedFromEvent(event)) {
+              SendStdOutStdErr(dap, process);
+              SendThreadStoppedEvent(dap);
             }
             break;
           case lldb::eStateRunning:
+          case lldb::eStateStepping:
             dap.WillContinue();
             SendContinuedEvent(dap);
             break;
@@ -284,6 +269,7 @@ llvm::Expected<InitializeResponseBody> InitializeRequestHandler::Run(
   // Do not source init files until in/out/err are configured.
   dap.debugger = lldb::SBDebugger::Create(false);
   dap.debugger.SetInputFile(dap.in);
+  dap.target = dap.debugger.GetDummyTarget();
 
   llvm::Expected<int> out_fd = dap.out.GetWriteFileDescriptor();
   if (!out_fd)
@@ -338,4 +324,8 @@ llvm::Expected<InitializeResponseBody> InitializeRequestHandler::Run(
   return dap.GetCapabilities();
 }
 
+void InitializeRequestHandler::PostRun() const {
+  dap.SendJSON(CreateEventObject("initialized"));
+}
+
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/Handler/LaunchRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/LaunchRequestHandler.cpp
index 3e4532e754ec6..7e0e76935dd02 100644
--- a/lldb/tools/lldb-dap/Handler/LaunchRequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/LaunchRequestHandler.cpp
@@ -71,9 +71,12 @@ void LaunchRequestHandler::PostRun() const {
   if (dap.target.GetProcess().IsValid()) {
     // Attach happens when launching with runInTerminal.
     SendProcessEvent(dap, dap.is_attach ? Attach : Launch);
-  }
 
-  dap.SendJSON(CreateEventObject("initialized"));
+    if (dap.stop_at_entry)
+      SendThreadStoppedEvent(dap);
+    else
+      dap.target.GetProcess().Continue();
+  }
 }
 
 } // namespace lldb_dap
diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp
index 7a75cd93abc19..282c5f4ab15a5 100644
--- a/lldb/tools/lldb-dap/Handler/RequestHandler.cpp
+++ b/lldb/tools/lldb-dap/Handler/RequestHandler.cpp
@@ -8,6 +8,7 @@
 
 #include "Handler/RequestHandler.h"
 #include "DAP.h"
+#include "EventHelper.h"
 #include "Handler/ResponseHandler.h"
 #include "JSONUtils.h"
 #include "LLDBUtils.h"
@@ -162,7 +163,7 @@ RunInTerminal(DAP &dap, const protocol::LaunchRequestArguments &arguments) {
   dap.target.GetProcess().Continue();
 
   // Now that the actual target is just starting (i.e. exec was just invoked),
-  // we return the debugger to its async state.
+  // we return the debugger to its sync state.
   scope_sync_mode.reset();
 
   // If sending the notification failed, the launcher should be dead by now and
@@ -238,35 +239,47 @@ llvm::Error BaseRequestHandler::LaunchProcess(
   launch_info.SetLaunchFlags(flags | lldb::eLaunchFlagDebug |
                              lldb::eLaunchFlagStopAtEntry);
 
-  if (arguments.runInTerminal) {
-    if (llvm::Error err = RunInTerminal(dap, arguments))
-      return err;
-  } else if (launchCommands.empty()) {
-    lldb::SBError error;
-    // Disable async events so the launch will be successful when we return from
-    // the launch call and the launch will happen synchronously
+  {
+    // Perform the launch in synchronous mode so that we don't have to worry
+    // about process state changes during the launch.
     ScopeSyncMode scope_sync_mode(dap.debugger);
-    dap.target.Launch(launch_info, error);
-    if (error.Fail())
-      return llvm::make_error<DAPError>(error.GetCString());
-  } else {
-    // Set the launch info so that run commands can access the configured
-    // launch details.
-    dap.target.SetLaunchInfo(launch_info);
-    if (llvm::Error err = dap.RunLaunchCommands(launchCommands))
-      return err;
-
-    // The custom commands might have created a new target so we should use the
-    // selected target after these commands are run.
-    dap.target = dap.debugger.GetSelectedTarget();
-    // Make sure the process is launched and stopped at the entry point before
-    // proceeding as the launch commands are not run using the synchronous
-    // mode.
-    lldb::SBError error = dap.WaitForProcessToStop(arguments.timeout);
-    if (error.Fail())
-      return llvm::make_error<DAPError>(error.GetCString());
+
+    if (arguments.runInTerminal) {
+      if (llvm::Error err = RunInTerminal(dap, arguments))
+        return err;
+    } else if (launchCommands.empty()) {
+      lldb::SBError error;
+      dap.target.Launch(launch_info, error);
+      if (error.Fail())
+        return llvm::make_error<DAPError>(error.GetCString());
+    } else {
+      // Set the launch info so that run commands can access the configured
+      // launch details.
+      dap.target.SetLaunchInfo(launch_info);
+      if (llvm::Error err = dap.RunLaunchCommands(launchCommands))
+        return err;
+
+      // The custom commands might have created a new target so we should use
+      // the selected target after these commands are run.
+      dap.target = dap.debugger.GetSelectedTarget();
+    }
   }
 
+  // Make sure the process is launched and stopped at the entry point before
+  // proceeding.
+  lldb::SBError error = dap.WaitForProcessToStop(arguments.timeout);
+  if (error.Fail())
+    return llvm::make_error<DAPError>(error.GetCString());
+
+  // Clients can request a baseline of currently existing threads after
+  // we acknowledge the configurationDone request.
+  // Client requests the baseline of currently existing threads after
+  // a successful or attach by sending a 'threads' request
+  // right after receiving the configurationDone response.
+  // Obtain the list of threads before we resume the process
+  dap.initial_thread_list =
+      GetThreads(dap.target.GetProcess(), dap.thread_format);
+
   return llvm::Error::success();
 }
 
diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.h b/lldb/tools/lldb-dap/Handler/RequestHandler.h
index 37cc902e1c98e..9e9cfb13d77b8 100644
--- a/lldb/tools/lldb-dap/Handler/RequestHandler.h
+++ b/lldb/tools/lldb-dap/Handler/RequestHandler.h
@@ -282,6 +282,7 @@ class InitializeRequestHandler
   static llvm::StringLiteral GetCommand() { return "initialize"; }
   llvm::Expected<protocol::InitializeResponseBody>
   Run(const protocol::InitializeRequestArguments &args) const override;
+  void PostRun() const override;
 };
 
 class LaunchRequestHandler

>From 83e1f34c1c8c7b5d2c53a45dfb44cf3766f7c345 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Wed, 7 May 2025 16:24:02 -0700
Subject: [PATCH 066/115] [clang-doc][NFC] Add TODO for future work (#138052)

---
 clang-tools-extra/clang-doc/Representation.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h
index 1d5c4dcaeaf37..a2f8701091baa 100644
--- a/clang-tools-extra/clang-doc/Representation.h
+++ b/clang-tools-extra/clang-doc/Representation.h
@@ -60,6 +60,7 @@ struct CommentInfo {
   // the vector.
   bool operator<(const CommentInfo &Other) const;
 
+  // TODO: The Kind field should be an enum, so we can switch on it easily.
   SmallString<16>
       Kind; // Kind of comment (FullComment, ParagraphComment, TextComment,
             // InlineCommandComment, HTMLStartTagComment, HTMLEndTagComment,

>From 59a73bdbc4c7b99a4b82ecd0d246267815699077 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Wed, 7 May 2025 16:24:27 -0700
Subject: [PATCH 067/115] [clang-doc] Add missing comment for Base (#138053)

---
 clang-tools-extra/clang-doc/Representation.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h
index a2f8701091baa..9e4484ceb70a8 100644
--- a/clang-tools-extra/clang-doc/Representation.h
+++ b/clang-tools-extra/clang-doc/Representation.h
@@ -535,6 +535,7 @@ struct ClangDocContext {
   std::vector<std::string> UserStylesheets;
   // JavaScript files that will be imported in all HTML files.
   std::vector<std::string> JsScripts;
+  // Base directory for remote repositories.
   StringRef Base;
   Index Idx;
 };

>From 2040f50a566e19b9bb30baf75f15199d1aba18b2 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames at gmail.com>
Date: Wed, 7 May 2025 03:17:16 +0000
Subject: [PATCH 068/115] [JITLink][i386] Improve unsupported relocation error
 message.

---
 llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
index a703b77e7c50b..1273dc1481c35 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
@@ -114,7 +114,7 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
 private:
   using ELFT = object::ELF32LE;
 
-  static Expected<i386::EdgeKind_i386> getRelocationKind(const uint32_t Type) {
+  Expected<i386::EdgeKind_i386> getRelocationKind(const uint32_t Type) {
     using namespace i386;
     switch (Type) {
     case ELF::R_386_NONE:
@@ -137,8 +137,9 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
       return EdgeKind_i386::BranchPCRel32;
     }
 
-    return make_error<JITLinkError>("Unsupported i386 relocation:" +
-                                    formatv("{0:d}", Type));
+    return make_error<JITLinkError>(
+        "In " + G->getName() + ": Unsupported i386 relocation type " +
+        object::getELFRelocationTypeName(ELF::EM_386, Type));
   }
 
   Error addRelocations() override {

>From 45b5cc08e5823c59802f88ec3f27108ab98c1eb9 Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail at gmail.com>
Date: Thu, 8 May 2025 01:41:57 +0200
Subject: [PATCH 069/115] [Clang] Fix the warning group of several compatibilty
 diagnostics (#138872)

There are a few diagnostics that are incorrectly grouped under
`-Wc++20-compat` instead of `-Wpre-c++20-compat`.

I grepped for any remaining `-Wc++xy-compat` diagnostics, but they all
seem to actually be about compatibility with C++XY.

Fixes #138775.
---
 clang/docs/ReleaseNotes.rst                   |  3 +++
 .../clang/Basic/DiagnosticCommonKinds.td      |  8 ++++---
 .../clang/Basic/DiagnosticSemaKinds.td        | 22 ++++++++-----------
 clang/lib/Sema/SemaDecl.cpp                   | 11 +++++-----
 clang/lib/Sema/SemaTemplate.cpp               | 10 ++++-----
 clang/test/CXX/drs/cwg1xx.cpp                 |  4 ++--
 clang/test/CXX/drs/cwg2xx.cpp                 | 18 +++++++--------
 clang/test/CXX/drs/cwg4xx.cpp                 |  2 +-
 clang/test/CXX/drs/cwg5xx.cpp                 |  6 ++---
 .../temp.res/temp.dep/temp.dep.type/p1.cpp    |  2 +-
 clang/test/FixIt/fixit.cpp                    |  2 +-
 clang/test/SemaCXX/MicrosoftCompatibility.cpp | 12 +++++-----
 clang/test/SemaCXX/MicrosoftExtensions.cpp    |  2 +-
 clang/test/SemaCXX/MicrosoftSuper.cpp         |  8 +++----
 clang/test/SemaCXX/gh138775.cpp               | 14 ++++++++++++
 clang/test/SemaCXX/rounding-math-crash.cpp    |  2 +-
 clang/test/SemaCXX/unknown-type-name.cpp      | 16 +++++++-------
 .../SemaTemplate/typename-specifier-3.cpp     |  2 +-
 18 files changed, 78 insertions(+), 66 deletions(-)
 create mode 100644 clang/test/SemaCXX/gh138775.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9c4ab80537ac9..4c25d6d4d515a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -505,6 +505,9 @@ Improvements to Clang's diagnostics
 - ``-Wreserved-identifier`` now fires on reserved parameter names in a function
   declaration which is not a definition.
 
+- Several compatibility diagnostics that were incorrectly being grouped under
+  ``-Wpre-c++20-compat`` are now part of ``-Wc++20-compat``. (#GH138775)
+
 Improvements to Clang's time-trace
 ----------------------------------
 
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index f26c906b46447..e4d94fefbbf3d 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -130,9 +130,11 @@ def err_attribute_not_type_attr : Error<
   "%0%select{ attribute|}1 cannot be applied to types">;
 def err_enum_template : Error<"enumeration cannot be a template">;
 
-def warn_cxx20_compat_consteval : Warning<
-  "'consteval' specifier is incompatible with C++ standards before C++20">,
-  InGroup<CXX20Compat>, DefaultIgnore;
+def warn_cxx20_compat_consteval
+    : Warning<"'consteval' specifier is incompatible with C++ standards before "
+              "C++20">,
+      InGroup<CXXPre20Compat>,
+      DefaultIgnore;
 def warn_missing_type_specifier : Warning<
   "type specifier missing, defaults to 'int'">,
   InGroup<ImplicitInt>, DefaultIgnore;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 195e1202880b9..e1b9ed0647bb9 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -51,6 +51,8 @@ defm adl_only_template_id : CXX20Compat<
   "with explicit template arguments is">;
 defm ctad_for_alias_templates
     : CXX20Compat<"class template argument deduction for alias templates is">;
+defm implicit_typename
+    : CXX20Compat<"missing 'typename' prior to dependent type name %0 is">;
 
 // C++23 compatibility with C++20 and earlier.
 defm constexpr_static_var : CXX23Compat<
@@ -5867,16 +5869,8 @@ def ext_typename_missing
 def err_typename_refers_to_using_value_decl : Error<
   "typename specifier refers to a dependent using declaration for a value "
   "%0 in %1">;
-def note_using_value_decl_missing_typename : Note<
-  "add 'typename' to treat this using declaration as a type">;
-def warn_cxx17_compat_implicit_typename : Warning<"use of implicit 'typename' is "
-  "incompatible with C++ standards before C++20">, InGroup<CXX20Compat>,
-  DefaultIgnore;
-def ext_implicit_typename
-    : ExtWarn<"missing 'typename' prior to dependent "
-              "type name %0; implicit 'typename' is a C++20 extension">,
-      InGroup<CXX20>;
-
+def note_using_value_decl_missing_typename
+    : Note<"add 'typename' to treat this using declaration as a type">;
 def err_template_kw_refers_to_non_template : Error<
   "%0%select{| following the 'template' keyword}1 "
   "does not refer to a template">;
@@ -9572,9 +9566,11 @@ def err_incomplete_type_used_in_type_trait_expr : Error<
   "incomplete type %0 used in type trait expression">, NoSFINAE;
 
 // C++20 constinit and require_constant_initialization attribute
-def warn_cxx20_compat_constinit : Warning<
-  "'constinit' specifier is incompatible with C++ standards before C++20">,
-  InGroup<CXX20Compat>, DefaultIgnore;
+def warn_cxx20_compat_constinit
+    : Warning<"'constinit' specifier is incompatible with C++ standards before "
+              "C++20">,
+      InGroup<CXXPre20Compat>,
+      DefaultIgnore;
 def err_constinit_local_variable : Error<
   "local variable cannot be declared 'constinit'">;
 def err_require_constant_init_failed : Error<
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 6b561d7bfc6e7..5a45198a7ce02 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -349,12 +349,11 @@ ParsedType Sema::getTypeName(const IdentifierInfo &II, SourceLocation NameLoc,
           if (AllowImplicitTypename == ImplicitTypenameContext::No)
             return nullptr;
           SourceLocation QualifiedLoc = SS->getRange().getBegin();
-          if (getLangOpts().CPlusPlus20)
-            Diag(QualifiedLoc, diag::warn_cxx17_compat_implicit_typename);
-          else
-            Diag(QualifiedLoc, diag::ext_implicit_typename)
-                << NestedNameSpecifier::Create(Context, SS->getScopeRep(), &II)
-                << FixItHint::CreateInsertion(QualifiedLoc, "typename ");
+          auto DB =
+              DiagCompat(QualifiedLoc, diag_compat::implicit_typename)
+              << NestedNameSpecifier::Create(Context, SS->getScopeRep(), &II);
+          if (!getLangOpts().CPlusPlus20)
+            DB << FixItHint::CreateInsertion(QualifiedLoc, "typename ");
         }
 
         // We know from the grammar that this name refers to a type,
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 65706d4b15455..94f4c1c46c1fb 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -3775,12 +3775,10 @@ TypeResult Sema::ActOnTemplateIdType(
       NestedNameSpecifier *NNS =
           NestedNameSpecifier::Create(Context, SS.getScopeRep(), TemplateII);
       if (AllowImplicitTypename == ImplicitTypenameContext::Yes) {
-        if (getLangOpts().CPlusPlus20)
-          Diag(SS.getBeginLoc(), diag::warn_cxx17_compat_implicit_typename);
-        else
-          Diag(SS.getBeginLoc(), diag::ext_implicit_typename)
-              << NNS
-              << FixItHint::CreateInsertion(SS.getBeginLoc(), "typename ");
+        auto DB = DiagCompat(SS.getBeginLoc(), diag_compat::implicit_typename)
+                  << NNS;
+        if (!getLangOpts().CPlusPlus20)
+          DB << FixItHint::CreateInsertion(SS.getBeginLoc(), "typename ");
       } else
         Diag(SS.getBeginLoc(), diag::err_typename_missing_template) << NNS;
 
diff --git a/clang/test/CXX/drs/cwg1xx.cpp b/clang/test/CXX/drs/cwg1xx.cpp
index 6b9ad31bffbcd..8b84de0ab5a9a 100644
--- a/clang/test/CXX/drs/cwg1xx.cpp
+++ b/clang/test/CXX/drs/cwg1xx.cpp
@@ -96,7 +96,7 @@ namespace cwg108 { // cwg108: 2.9
   template<typename T> struct A {
     struct B { typedef int X; };
     B::X x;
-    // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B::X'; implicit 'typename' is a C++20 extension}}
+    // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B::X' is a C++20 extension}}
     struct C : B { X x; };
     // expected-error at -1 {{unknown type name 'X'}}
   };
@@ -321,7 +321,7 @@ namespace cwg121 { // cwg121: 2.7
     X::Y<T> x;
     T::Y<T> y;
     // expected-error at -1 {{use 'template' keyword to treat 'Y' as a dependent template name}}
-    // cxx98-17-error at -2 {{missing 'typename' prior to dependent type name 'T::Y'; implicit 'typename' is a C++20 extension}}
+    // cxx98-17-error at -2 {{missing 'typename' prior to dependent type name 'T::Y' is a C++20 extension}}
   };
   Z<X> z;
 } // namespace cwg121
diff --git a/clang/test/CXX/drs/cwg2xx.cpp b/clang/test/CXX/drs/cwg2xx.cpp
index b2ae8f88ead74..a53a8d1ed64a8 100644
--- a/clang/test/CXX/drs/cwg2xx.cpp
+++ b/clang/test/CXX/drs/cwg2xx.cpp
@@ -426,7 +426,7 @@ namespace cwg224 { // cwg224: 16
       A::type a;
       A<T>::type b;
       A<T*>::type c;
-      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T *>::type'; implicit 'typename' is a C++20 extension}}
+      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T *>::type' is a C++20 extension}}
       ::cwg224::example1::A<T>::type d;
 
       class B {
@@ -435,13 +435,13 @@ namespace cwg224 { // cwg224: 16
         A::type a;
         A<T>::type b;
         A<T*>::type c;
-        // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T *>::type'; implicit 'typename' is a C++20 extension}}
+        // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T *>::type' is a C++20 extension}}
         ::cwg224::example1::A<T>::type d;
 
         B::type e;
         A<T>::B::type f;
         A<T*>::B::type g;
-        // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T *>::B::type'; implicit 'typename' is a C++20 extension}}
+        // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T *>::B::type' is a C++20 extension}}
         typename A<T*>::B::type h;
       };
     };
@@ -450,25 +450,25 @@ namespace cwg224 { // cwg224: 16
       typedef int type;
       A<T*>::type a;
       A<T>::type b;
-      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T>::type'; implicit 'typename' is a C++20 extension}}
+      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T>::type' is a C++20 extension}}
     };
 
     template <class T1, class T2, int I> struct B {
       typedef int type;
       B<T1, T2, I>::type b1;
       B<T2, T1, I>::type b2;
-      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B<T2, T1, I>::type'; implicit 'typename' is a C++20 extension}}
+      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B<T2, T1, I>::type' is a C++20 extension}}
 
       typedef T1 my_T1;
       static const int my_I = I;
       static const int my_I2 = I+0;
       static const int my_I3 = my_I;
       B<my_T1, T2, my_I>::type b3;
-      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B<my_T1, T2, my_I>::type'; implicit 'typename' is a C++20 extension}}
+      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B<my_T1, T2, my_I>::type' is a C++20 extension}}
       B<my_T1, T2, my_I2>::type b4;
-      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B<my_T1, T2, my_I2>::type'; implicit 'typename' is a C++20 extension}}
+      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B<my_T1, T2, my_I2>::type' is a C++20 extension}}
       B<my_T1, T2, my_I3>::type b5;
-      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B<my_T1, T2, my_I3>::type'; implicit 'typename' is a C++20 extension}}
+      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'B<my_T1, T2, my_I3>::type' is a C++20 extension}}
     };
   }
 
@@ -480,7 +480,7 @@ namespace cwg224 { // cwg224: 16
       X<A::i, char>::type x;
       X<A<T>::i, double>::type y;
       X<A<T*>::i, long>::type z;
-      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'X<A<T *>::i, long>::type'; implicit 'typename' is a C++20 extension}}
+      // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'X<A<T *>::i, long>::type' is a C++20 extension}}
       int f();
     };
     template <class T> int A<T>::f() {
diff --git a/clang/test/CXX/drs/cwg4xx.cpp b/clang/test/CXX/drs/cwg4xx.cpp
index e8e2600870233..210f7ae71ec04 100644
--- a/clang/test/CXX/drs/cwg4xx.cpp
+++ b/clang/test/CXX/drs/cwg4xx.cpp
@@ -257,7 +257,7 @@ namespace cwg409 { // cwg409: 2.7
     A::B b2;
     A<T>::B b3;
     A<T*>::B b4;
-    // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T *>::B'; implicit 'typename' is a C++20 extension}}
+    // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'A<T *>::B' is a C++20 extension}}
   };
 } // namespace cwg409
 
diff --git a/clang/test/CXX/drs/cwg5xx.cpp b/clang/test/CXX/drs/cwg5xx.cpp
index 0825b52653b4d..1d505adecfb27 100644
--- a/clang/test/CXX/drs/cwg5xx.cpp
+++ b/clang/test/CXX/drs/cwg5xx.cpp
@@ -254,9 +254,9 @@ namespace cwg526 { // cwg526: 2.7
     typedef int type;
     X<N>::type v1;
     X<(N)>::type v2;
-    // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'X<(N)>::type'; implicit 'typename' is a C++20 extension}}
+    // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'X<(N)>::type' is a C++20 extension}}
     X<+N>::type v3;
-    // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'X<+N>::type'; implicit 'typename' is a C++20 extension}}
+    // cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'X<+N>::type' is a C++20 extension}}
   };
 } // namespace cwg526
 
@@ -783,7 +783,7 @@ struct Outer {
 };
 template <class T>
 Outer<T>::Inner* Outer<T>::Inner::self() { return this; }
-// cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'Outer<T>::Inner'; implicit 'typename' is a C++20 extension}}
+// cxx98-17-error at -1 {{missing 'typename' prior to dependent type name 'Outer<T>::Inner' is a C++20 extension}}
 
 } // namespace cwg560
 
diff --git a/clang/test/CXX/temp/temp.res/temp.dep/temp.dep.type/p1.cpp b/clang/test/CXX/temp/temp.res/temp.dep/temp.dep.type/p1.cpp
index 910dab11ee5e1..acaeea9e70e3f 100644
--- a/clang/test/CXX/temp/temp.res/temp.dep/temp.dep.type/p1.cpp
+++ b/clang/test/CXX/temp/temp.res/temp.dep/temp.dep.type/p1.cpp
@@ -17,7 +17,7 @@ namespace Example1 {
 
   template<class T> struct A<A<A<T>>> {
     struct C {};
-    B<B<T>>::C bc; // expected-warning {{implicit 'typename' is a C++20 extension}}
+    B<B<T>>::C bc; // expected-warning {{missing 'typename' prior to dependent type name 'B<B<T>>::C' is a C++20 extension}}
   };
 }
 
diff --git a/clang/test/FixIt/fixit.cpp b/clang/test/FixIt/fixit.cpp
index 605c2d0bd0235..3e5040969c3ee 100644
--- a/clang/test/FixIt/fixit.cpp
+++ b/clang/test/FixIt/fixit.cpp
@@ -211,7 +211,7 @@ struct MoreAccidentalCommas {
 template<class T> struct Mystery;
 template<class T> typedef Mystery<T>::type getMysteriousThing() { // \
   expected-error {{function definition declared 'typedef'}} \
-  expected-warning {{implicit 'typename' is a C++20 extension}}
+  expected-warning {{missing 'typename' prior to dependent type name 'Mystery<T>::type' is a C++20 extension}}
   return Mystery<T>::get();
 }
 
diff --git a/clang/test/SemaCXX/MicrosoftCompatibility.cpp b/clang/test/SemaCXX/MicrosoftCompatibility.cpp
index a830883280173..b8cd22ad350a5 100644
--- a/clang/test/SemaCXX/MicrosoftCompatibility.cpp
+++ b/clang/test/SemaCXX/MicrosoftCompatibility.cpp
@@ -211,14 +211,14 @@ class C : private A<T>, public B<U> {
    typedef B<U> Base2;
    typedef A<U> Base3;
 
-   A<T>::TYPE a1; // expected-warning {{implicit 'typename' is a C++20 extension}}
-   Base1::TYPE a2; // expected-warning {{implicit 'typename' is a C++20 extension}}
+   A<T>::TYPE a1; // expected-warning {{missing 'typename' prior to dependent type name 'A<T>::TYPE' is a C++20 extension}}
+   Base1::TYPE a2; // expected-warning {{missing 'typename' prior to dependent type name 'Base1::TYPE' is a C++20 extension}}
 
-   B<U>::TYPE a3; // expected-warning {{implicit 'typename' is a C++20 extension}}
-   Base2::TYPE a4; // expected-warning {{implicit 'typename' is a C++20 extension}}
+   B<U>::TYPE a3; // expected-warning {{missing 'typename' prior to dependent type name 'B<U>::TYPE' is a C++20 extension}}
+   Base2::TYPE a4; // expected-warning {{missing 'typename' prior to dependent type name 'Base2::TYPE' is a C++20 extension}}
 
-   A<U>::TYPE a5; // expected-warning {{implicit 'typename' is a C++20 extension}}
-   Base3::TYPE a6; // expected-warning {{implicit 'typename' is a C++20 extension}}
+   A<U>::TYPE a5; // expected-warning {{missing 'typename' prior to dependent type name 'A<U>::TYPE' is a C++20 extension}}
+   Base3::TYPE a6; // expected-warning {{missing 'typename' prior to dependent type name 'Base3::TYPE' is a C++20 extension}}
  };
 
 class D {
diff --git a/clang/test/SemaCXX/MicrosoftExtensions.cpp b/clang/test/SemaCXX/MicrosoftExtensions.cpp
index 7454a01158f6b..4dff2b1c362a7 100644
--- a/clang/test/SemaCXX/MicrosoftExtensions.cpp
+++ b/clang/test/SemaCXX/MicrosoftExtensions.cpp
@@ -613,7 +613,7 @@ typedef char __unaligned *aligned_type; // expected-error {{expected ';' after t
 
 namespace PR32750 {
 template<typename T> struct A {};
-template<typename T> struct B : A<A<T>> { A<T>::C::D d; }; // expected-warning {{implicit 'typename' is a C++20 extension}}
+template<typename T> struct B : A<A<T>> { A<T>::C::D d; }; // expected-warning {{missing 'typename' prior to dependent type name 'A<T>::C::D' is a C++20 extension}}
 }
 
 #endif
diff --git a/clang/test/SemaCXX/MicrosoftSuper.cpp b/clang/test/SemaCXX/MicrosoftSuper.cpp
index 94e29b23ef11c..d117b93523363 100644
--- a/clang/test/SemaCXX/MicrosoftSuper.cpp
+++ b/clang/test/SemaCXX/MicrosoftSuper.cpp
@@ -108,8 +108,8 @@ struct DerivedFromDependentBase : BaseTemplate<T> {
   typename __super::XXX a;
   typedef typename __super::XXX b;
 
-  __super::XXX c;         // expected-warning {{implicit 'typename' is a C++20 extension}}
-  typedef __super::XXX d; // expected-warning {{implicit 'typename' is a C++20 extension}}
+  __super::XXX c;         // expected-warning {{missing 'typename'}}
+  typedef __super::XXX d; // expected-warning {{missing 'typename'}}
 
   void foo() {
     typename __super::XXX e;
@@ -127,8 +127,8 @@ struct DerivedFromTemplateParameter : T {
   typename __super::XXX a;
   typedef typename __super::XXX b;
 
-  __super::XXX c;         // expected-warning {{implicit 'typename' is a C++20 extension}}
-  typedef __super::XXX d; // expected-warning {{implicit 'typename' is a C++20 extension}}
+  __super::XXX c;         // expected-warning {{missing 'typename'}}
+  typedef __super::XXX d; // expected-warning {{missing 'typename'}}
 
   void foo() {
     typename __super::XXX e;
diff --git a/clang/test/SemaCXX/gh138775.cpp b/clang/test/SemaCXX/gh138775.cpp
new file mode 100644
index 0000000000000..854e25f84fe49
--- /dev/null
+++ b/clang/test/SemaCXX/gh138775.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify=cxx17 %s
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=pre-cxx20-compat -Wpre-c++20-compat %s
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=cxx20-compat -Wc++20-compat %s
+// cxx20-compat-no-diagnostics
+
+// cxx17-error at +4 {{unknown type name 'consteval'; did you mean 'constexpr'}}
+// cxx17-warning at +3 {{missing 'typename' prior to dependent type name 'T::type' is a C++20 extension}}
+// pre-cxx20-compat-warning at +2 {{'consteval' specifier is incompatible with C++ standards before C++20}}
+// pre-cxx20-compat-warning at +1 {{missing 'typename' prior to dependent type name 'T::type' is incompatible with C++ standards before C++20}}
+template<typename T> consteval T::type f();
+
+// cxx17-error at +2 {{unknown type name 'constinit'}}
+// pre-cxx20-compat-warning at +1 {{'constinit' specifier is incompatible with C++ standards before C++20}}
+constinit int x = 4;
diff --git a/clang/test/SemaCXX/rounding-math-crash.cpp b/clang/test/SemaCXX/rounding-math-crash.cpp
index 2a09b02fe9cef..f9c5ada2a403e 100644
--- a/clang/test/SemaCXX/rounding-math-crash.cpp
+++ b/clang/test/SemaCXX/rounding-math-crash.cpp
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple x86_64-linux -fsyntax-only -frounding-math -verify %s
 
 template <class b> b::a() {}
-// expected-warning at -1 {{implicit 'typename' is a C++20 extension}}
+// expected-warning at -1 {{missing 'typename' prior to dependent type name 'b::a' is a C++20 extension}}
 // expected-error at -2 {{expected unqualified-id}}
diff --git a/clang/test/SemaCXX/unknown-type-name.cpp b/clang/test/SemaCXX/unknown-type-name.cpp
index 602f8f9ec7d29..9ce8b69c5bd22 100644
--- a/clang/test/SemaCXX/unknown-type-name.cpp
+++ b/clang/test/SemaCXX/unknown-type-name.cpp
@@ -36,15 +36,15 @@ struct A {
 
   static int n;
   static type m;
-  static int h(T::type, int); // expected-warning{{implicit 'typename' is a C++20 extension}}
-  static int h(T::type x, char); // expected-warning{{implicit 'typename' is a C++20 extension}}
+  static int h(T::type, int); // expected-warning{{missing 'typename'}}
+  static int h(T::type x, char); // expected-warning{{missing 'typename'}}
 };
 
 template<typename T>
-A<T>::type g(T t) { return t; } // expected-warning{{implicit 'typename' is a C++20 extension}}
+A<T>::type g(T t) { return t; } // expected-warning{{missing 'typename'}}
 
 template<typename T>
-A<T>::type A<T>::f() { return type(); } // expected-warning{{implicit 'typename' is a C++20 extension}}
+A<T>::type A<T>::f() { return type(); } // expected-warning{{missing 'typename'}}
 
 template<typename T>
 void f(T::type) { } // expected-error{{missing 'typename'}}
@@ -84,11 +84,11 @@ int *test(UnknownType *fool) { return 0; } // expected-error{{unknown type name
 
 template<typename T> int A<T>::n(T::value); // ok
 template<typename T>
-A<T>::type // expected-warning {{implicit 'typename' is a C++20 extension}}
+A<T>::type // expected-warning {{missing 'typename'}}
 A<T>::m(T::value, 0); // ok
 
-template<typename T> int A<T>::h(T::type, int) {} // expected-warning{{implicit 'typename' is a C++20 extension}}
-template<typename T> int A<T>::h(T::type x, char) {} // expected-warning{{implicit 'typename' is a C++20 extension}}
+template<typename T> int A<T>::h(T::type, int) {} // expected-warning{{missing 'typename'}}
+template<typename T> int A<T>::h(T::type x, char) {} // expected-warning{{missing 'typename'}}
 
 template<typename T> int h(T::type, int); // expected-error{{missing 'typename'}}
 template<typename T> int h(T::type x, char); // expected-error{{missing 'typename'}}
@@ -117,4 +117,4 @@ template<typename T> int i(T::type, int());
 //        a fix-it to add 'typename A<T>::type'
 template<typename T>
 A<T>::g() { } // expected-error{{expected unqualified-id}}
-// expected-warning at -1{{implicit 'typename' is a C++20 extension}}
+// expected-warning at -1{{missing 'typename'}}
diff --git a/clang/test/SemaTemplate/typename-specifier-3.cpp b/clang/test/SemaTemplate/typename-specifier-3.cpp
index cdd065c98bb0a..6e09012a86e6a 100644
--- a/clang/test/SemaTemplate/typename-specifier-3.cpp
+++ b/clang/test/SemaTemplate/typename-specifier-3.cpp
@@ -28,7 +28,7 @@ namespace PR12884_original {
       typedef int arg;
     };
     struct C {
-      typedef B::X<typename B::arg> x; // precxx17-warning{{missing 'typename' prior to dependent type name 'B::X'; implicit 'typename' is a C++20 extension}}
+      typedef B::X<typename B::arg> x; // precxx17-warning{{missing 'typename' prior to dependent type name 'B::X' is a C++20 extension}}
     };
   };
 

>From 934cfa796e83e2fda4de14f8dfe68586270dd49c Mon Sep 17 00:00:00 2001
From: Chengjun <chengjunp at Nvidia.com>
Date: Wed, 7 May 2025 16:59:33 -0700
Subject: [PATCH 070/115] [NVPTX] Fix NVPTXAA_before_BasicAA Test (#138992)

Fix the failed test in the
[PR](https://github.com/llvm/llvm-project/pull/125965) by moving the
test to CodeGen/NVPTX.
---
 .../{Analysis/NVPTXAA => CodeGen/NVPTX}/NVPTXAA_before_BasicAA.ll | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/{Analysis/NVPTXAA => CodeGen/NVPTX}/NVPTXAA_before_BasicAA.ll (100%)

diff --git a/llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll b/llvm/test/CodeGen/NVPTX/NVPTXAA_before_BasicAA.ll
similarity index 100%
rename from llvm/test/Analysis/NVPTXAA/NVPTXAA_before_BasicAA.ll
rename to llvm/test/CodeGen/NVPTX/NVPTXAA_before_BasicAA.ll

>From 32f514c68d78530f7d7adee883e1b4fc8dd1e9fd Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames at gmail.com>
Date: Wed, 7 May 2025 03:22:42 +0000
Subject: [PATCH 071/115] [JITLink][i386] Support R_386_GOT32X using existing
 non-relaxable edge kind.

R_386_GOT32 was already handled by lowering to
EdgeKind_i386::RequestGOTAndTransformToDelta32FromGOT. R_386_GOT32X is just an
optionally relaxable version of R_386_GOT32, so we can lower to the same edge
kind.

I've left a TODO to add a relaxable edge kind and update the i386 relaxation
optimization in the future, though I'll probably leave this as an exercise for
any i386 aficionados out there. ;)
---
 llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp |  3 +++
 .../i386/ELF_i386_small_pic_relocations_got.s | 21 +++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
index 1273dc1481c35..6e9f6ed40ec8c 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
@@ -129,6 +129,9 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
       return EdgeKind_i386::PCRel16;
     case ELF::R_386_GOT32:
       return EdgeKind_i386::RequestGOTAndTransformToDelta32FromGOT;
+    case ELF::R_386_GOT32X:
+      // TODO: Add a relaxable edge kind and update relaxation optimization.
+      return EdgeKind_i386::RequestGOTAndTransformToDelta32FromGOT;
     case ELF::R_386_GOTPC:
       return EdgeKind_i386::Delta32;
     case ELF::R_386_GOTOFF:
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s b/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s
index 080341ac3bfed..d4cf7090b7dc0 100644
--- a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s
+++ b/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s
@@ -33,7 +33,28 @@ test_got:
         leal    named_data2 at GOT+5, %eax
         .size   test_got, .-test_got
 
+# Test R_386_GOT32X handling.
+#
+# We want to check both the offset to the GOT entry and its contents.
+# jitlink-check: decode_operand(test_gotx_load, 4) = got_addr(elf_sm_pic_reloc_got.o, named_data1) - _GLOBAL_OFFSET_TABLE_
+# jitlink-check: *{4}(got_addr(elf_sm_pic_reloc_got.o, named_data1)) = named_data1
 
+        .globl test_gotx
+        .p2align      4, 0x90
+        .type   test_gotx, at function
+test_gotx:
+	calll	.L0$pb
+.L0$pb:
+	popl	%eax
+.Ltmp0:
+	addl	$_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %eax
+        .globl test_gotx_load
+test_gotx_load:
+	movl	named_data1 at GOT(%eax), %eax
+        .size   test_gotx_load, .-test_gotx_load
+	movl	(%eax), %eax
+	retl
+        .size   test_gotx, .-test_gotx
 
 # Test GOTOFF64 handling.
 # jitlink-check: decode_operand(test_gotoff, 1) = named_func - _GLOBAL_OFFSET_TABLE_ + 99

>From c1f0e68cec4218c9d51a4ad0a6f6d878ed573dfe Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames at gmail.com>
Date: Wed, 7 May 2025 03:39:06 +0000
Subject: [PATCH 072/115] [JITLink][i386] Get rid of EdgeKind_i386::None.

R_386_NONE ELF edges should be handled by skipping the relocation, rather than
adding no-op edges to the LinkGraph.
---
 llvm/include/llvm/ExecutionEngine/JITLink/i386.h |  9 +--------
 llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp    | 12 +++++++-----
 llvm/lib/ExecutionEngine/JITLink/i386.cpp        |  2 --
 3 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
index efe8182934dd7..629e0d8a18729 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
@@ -20,9 +20,6 @@ namespace llvm::jitlink::i386 {
 /// Represets i386 fixups
 enum EdgeKind_i386 : Edge::Kind {
 
-  /// None
-  None = Edge::FirstRelocation,
-
   /// A plain 32-bit pointer value relocation.
   ///
   /// Fixup expression:
@@ -32,7 +29,7 @@ enum EdgeKind_i386 : Edge::Kind {
   ///   - The target must reside in the low 32-bits of the address space,
   ///     otherwise an out-of-range error will be returned.
   ///
-  Pointer32,
+  Pointer32 = Edge::FirstRelocation,
 
   /// A 32-bit PC-relative relocation.
   ///
@@ -192,10 +189,6 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
   auto FixupAddress = B.getAddress() + E.getOffset();
 
   switch (E.getKind()) {
-  case i386::None: {
-    break;
-  }
-
   case i386::Pointer32: {
     uint32_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
     *(ulittle32_t *)FixupPtr = Value;
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
index 6e9f6ed40ec8c..b14b87232e24d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
@@ -117,8 +117,6 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
   Expected<i386::EdgeKind_i386> getRelocationKind(const uint32_t Type) {
     using namespace i386;
     switch (Type) {
-    case ELF::R_386_NONE:
-      return EdgeKind_i386::None;
     case ELF::R_386_32:
       return EdgeKind_i386::Pointer32;
     case ELF::R_386_PC32:
@@ -170,6 +168,12 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
                             Block &BlockToFix) {
     using Base = ELFLinkGraphBuilder<ELFT>;
 
+    auto ELFReloc = Rel.getType(false);
+
+    // R_386_NONE is a no-op.
+    if (LLVM_UNLIKELY(ELFReloc == ELF::R_386_NONE))
+      return Error::success();
+
     uint32_t SymbolIndex = Rel.getSymbol(false);
     auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
     if (!ObjSymbol)
@@ -184,7 +188,7 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
                   Base::GraphSymbols.size()),
           inconvertibleErrorCode());
 
-    Expected<i386::EdgeKind_i386> Kind = getRelocationKind(Rel.getType(false));
+    Expected<i386::EdgeKind_i386> Kind = getRelocationKind(ELFReloc);
     if (!Kind)
       return Kind.takeError();
 
@@ -192,8 +196,6 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
     int64_t Addend = 0;
 
     switch (*Kind) {
-    case i386::EdgeKind_i386::None:
-      break;
     case i386::EdgeKind_i386::Pointer32:
     case i386::EdgeKind_i386::PCRel32:
     case i386::EdgeKind_i386::RequestGOTAndTransformToDelta32FromGOT:
diff --git a/llvm/lib/ExecutionEngine/JITLink/i386.cpp b/llvm/lib/ExecutionEngine/JITLink/i386.cpp
index e984bb10983d0..f714716fb353d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/i386.cpp
@@ -18,8 +18,6 @@ namespace llvm::jitlink::i386 {
 
 const char *getEdgeKindName(Edge::Kind K) {
   switch (K) {
-  case None:
-    return "None";
   case Pointer32:
     return "Pointer32";
   case PCRel32:

>From 764614e6355e214c6b64c715d105007b1a4b97fd Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 7 May 2025 17:34:44 -0700
Subject: [PATCH 073/115] [MemProf] Restructure the pruning of unneeded NotCold
 contexts (#138792)

This change is mostly NFC, other than the addition of a new message
printed when contexts are pruned when -memprof-report-hinted-sizes is
enabled.

To prepare for a follow on change, adjust the way we determine which
NotCold contexts can be pruned (because they overlap with longer NotCold
contexts), and change the way we perform this pruning.

Instead of determining the points at which we need to keep NotCold
contexts during the building of the trie, we now determine this on the
fly as the MIB metadata nodes are recursively built. This simplifies a
follow on change that performs additional pruning of some NotCold
contexts, and which can affect which others need to be kept as the
longest overlapping NotCold contexts.
---
 .../include/llvm/Analysis/MemoryProfileInfo.h |  34 +---
 llvm/lib/Analysis/MemoryProfileInfo.cpp       | 151 +++++++++++++-----
 llvm/test/Transforms/PGOProfile/memprof.ll    |  17 +-
 3 files changed, 123 insertions(+), 79 deletions(-)

diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index f75783a4fef50..deb7ab134c161 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -56,37 +56,6 @@ class CallStackTrie {
     // Allocation types for call context sharing the context prefix at this
     // node.
     uint8_t AllocTypes;
-    // Updated as we add allocations to note if this is the deepest point in the
-    // trie that has an ambiguous allocation type (both Cold and NotCold). It is
-    // used to prune unneeded NotCold contexts, taking advantage of the fact
-    // that we later will only clone Cold contexts, as NotCold is the allocation
-    // default. We only need to keep as metadata the NotCold contexts that
-    // overlap the longest with Cold allocations, so that we know how deeply we
-    // need to clone. For example, assume we add the following contexts to the
-    // trie:
-    //    1 3 (notcold)
-    //    1 2 4 (cold)
-    //    1 2 5 (notcold)
-    //    1 2 6 (notcold)
-    // the trie looks like:
-    //         1
-    //        / \
-    //       2   3
-    //      /|\
-    //     4 5 6
-    //
-    // It is sufficient to prune all but one not cold contexts (either 1,2,5 or
-    // 1,2,6, we arbitrarily keep the first one we encounter which will be
-    // 1,2,5). We'll initially have DeepestAmbiguousAllocType set false for trie
-    // node 1 after the trie is built, and true for node 2. This indicates that
-    // the not cold context ending in 3 is not needed (its immediate callee has
-    // this value set false). The first notcold context we encounter when
-    // iterating the callers of node 2 will be the context ending in 5 (since
-    // std::map iteration is in sorted order of key), which will see that this
-    // field is true for its callee, so we will keep it. But at that point we
-    // set the callee's flag to false which prevents adding the not cold context
-    // ending in 6 unnecessarily.
-    bool DeepestAmbiguousAllocType = true;
     // If the user has requested reporting of hinted sizes, keep track of the
     // associated full stack id and profiled sizes. Can have more than one
     // after trimming (e.g. when building from metadata). This is only placed on
@@ -134,8 +103,7 @@ class CallStackTrie {
   bool buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
                      std::vector<uint64_t> &MIBCallStack,
                      std::vector<Metadata *> &MIBNodes,
-                     bool CalleeHasAmbiguousCallerContext,
-                     bool &CalleeDeepestAmbiguousAllocType);
+                     bool CalleeHasAmbiguousCallerContext);
 
 public:
   CallStackTrie() = default;
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 6ca5b5e492723..30b674c320ef6 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -163,16 +163,10 @@ void CallStackTrie::addCallStack(
       continue;
     }
     // Update existing caller node if it exists.
-    CallStackTrieNode *Prev = nullptr;
     auto [Next, Inserted] = Curr->Callers.try_emplace(StackId);
     if (!Inserted) {
-      Prev = Curr;
       Curr = Next->second;
       Curr->addAllocType(AllocType);
-      // If this node has an ambiguous alloc type, its callee is not the deepest
-      // point where we have an ambigous allocation type.
-      if (!hasSingleAllocType(Curr->AllocTypes))
-        Prev->DeepestAmbiguousAllocType = false;
       continue;
     }
     // Otherwise add a new caller node.
@@ -248,41 +242,114 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
     convertHotToNotCold(Caller.second);
 }
 
+// Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending
+// on options that enable filtering out some NotCold contexts.
+static void SaveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
+                                    std::vector<Metadata *> &SavedMIBNodes,
+                                    unsigned CallerContextLength) {
+  // In the simplest case, with pruning disabled, keep all the new MIB nodes.
+  if (MemProfKeepAllNotColdContexts)
+    append_range(SavedMIBNodes, NewMIBNodes);
+
+  auto EmitMessageForRemovedContexts = [](const MDNode *MIBMD, StringRef Tag,
+                                          StringRef Extra) {
+    assert(MIBMD->getNumOperands() > 2);
+    for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
+      MDNode *ContextSizePair = dyn_cast<MDNode>(MIBMD->getOperand(I));
+      assert(ContextSizePair->getNumOperands() == 2);
+      uint64_t FullStackId =
+          mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand(0))
+              ->getZExtValue();
+      uint64_t TS =
+          mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand(1))
+              ->getZExtValue();
+      errs() << "MemProf hinting: Total size for " << Tag
+             << " non-cold full allocation context hash " << FullStackId
+             << Extra << ": " << TS << "\n";
+    }
+  };
+
+  // Prune unneeded NotCold contexts, taking advantage of the fact
+  // that we later will only clone Cold contexts, as NotCold is the allocation
+  // default. We only need to keep as metadata the NotCold contexts that
+  // overlap the longest with Cold allocations, so that we know how deeply we
+  // need to clone. For example, assume we add the following contexts to the
+  // trie:
+  //    1 3 (notcold)
+  //    1 2 4 (cold)
+  //    1 2 5 (notcold)
+  //    1 2 6 (notcold)
+  // the trie looks like:
+  //         1
+  //        / \
+  //       2   3
+  //      /|\
+  //     4 5 6
+  //
+  // It is sufficient to prune all but one not-cold contexts (either 1,2,5 or
+  // 1,2,6, we arbitrarily keep the first one we encounter which will be
+  // 1,2,5).
+  //
+  // To do this pruning, we first check if there were any not-cold
+  // contexts kept for a deeper caller, which will have a context length larger
+  // than the CallerContextLength being handled here (i.e. kept by a deeper
+  // recursion step). If so, none of the not-cold MIB nodes added for the
+  // immediate callers need to be kept. If not, we keep the first (created
+  // for the immediate caller) not-cold MIB node.
+  bool LongerNotColdContextKept = false;
+  for (auto *MIB : NewMIBNodes) {
+    auto MIBMD = cast<MDNode>(MIB);
+    if (getMIBAllocType(MIBMD) == AllocationType::Cold)
+      continue;
+    MDNode *StackMD = getMIBStackNode(MIBMD);
+    assert(StackMD);
+    if (StackMD->getNumOperands() > CallerContextLength) {
+      LongerNotColdContextKept = true;
+      break;
+    }
+  }
+  // Don't need to emit any for the immediate caller if we already have
+  // longer overlapping contexts;
+  bool KeepFirstNewNotCold = !LongerNotColdContextKept;
+  auto NewColdMIBNodes = make_filter_range(NewMIBNodes, [&](const Metadata *M) {
+    auto MIBMD = cast<MDNode>(M);
+    // Only keep cold contexts and first (longest non-cold context).
+    if (getMIBAllocType(MIBMD) != AllocationType::Cold) {
+      MDNode *StackMD = getMIBStackNode(MIBMD);
+      assert(StackMD);
+      // Keep any already kept for longer contexts.
+      if (StackMD->getNumOperands() > CallerContextLength)
+        return true;
+      // Otherwise keep the first one added by the immediate caller if there
+      // were no longer contexts.
+      if (KeepFirstNewNotCold) {
+        KeepFirstNewNotCold = false;
+        return true;
+      }
+      if (MemProfReportHintedSizes)
+        EmitMessageForRemovedContexts(MIBMD, "pruned", "");
+      return false;
+    }
+    return true;
+  });
+  for (auto *M : NewColdMIBNodes)
+    SavedMIBNodes.push_back(M);
+}
+
 // Recursive helper to trim contexts and create metadata nodes.
 // Caller should have pushed Node's loc to MIBCallStack. Doing this in the
 // caller makes it simpler to handle the many early returns in this method.
 bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
                                   std::vector<uint64_t> &MIBCallStack,
                                   std::vector<Metadata *> &MIBNodes,
-                                  bool CalleeHasAmbiguousCallerContext,
-                                  bool &CalleeDeepestAmbiguousAllocType) {
+                                  bool CalleeHasAmbiguousCallerContext) {
   // Trim context below the first node in a prefix with a single alloc type.
   // Add an MIB record for the current call stack prefix.
   if (hasSingleAllocType(Node->AllocTypes)) {
-    // Because we only clone cold contexts (we don't clone for exposing NotCold
-    // contexts as that is the default allocation behavior), we create MIB
-    // metadata for this context if any of the following are true:
-    // 1) It is cold.
-    // 2) The immediate callee is the deepest point where we have an ambiguous
-    //    allocation type (i.e. the other callers that are cold need to know
-    //    that we have a not cold context overlapping to this point so that we
-    //    know how deep to clone).
-    // 3) MemProfKeepAllNotColdContexts is enabled, which is useful if we are
-    //    reporting hinted sizes, and want to get information from the indexing
-    //    step for all contexts, or have specified a value less than 100% for
-    //    -memprof-cloning-cold-threshold.
-    if (Node->hasAllocType(AllocationType::Cold) ||
-        CalleeDeepestAmbiguousAllocType || MemProfKeepAllNotColdContexts) {
-      std::vector<ContextTotalSize> ContextSizeInfo;
-      collectContextSizeInfo(Node, ContextSizeInfo);
-      MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack,
-                                       (AllocationType)Node->AllocTypes,
-                                       ContextSizeInfo));
-      // If we just emitted an MIB for a not cold caller, don't need to emit
-      // another one for the callee to correctly disambiguate its cold callers.
-      if (!Node->hasAllocType(AllocationType::Cold))
-        CalleeDeepestAmbiguousAllocType = false;
-    }
+    std::vector<ContextTotalSize> ContextSizeInfo;
+    collectContextSizeInfo(Node, ContextSizeInfo);
+    MIBNodes.push_back(createMIBNode(
+        Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo));
     return true;
   }
 
@@ -291,14 +358,21 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   if (!Node->Callers.empty()) {
     bool NodeHasAmbiguousCallerContext = Node->Callers.size() > 1;
     bool AddedMIBNodesForAllCallerContexts = true;
+    // Accumulate all new MIB nodes by the recursive calls below into a vector
+    // that will later be filtered before adding to the caller's MIBNodes
+    // vector.
+    std::vector<Metadata *> NewMIBNodes;
     for (auto &Caller : Node->Callers) {
       MIBCallStack.push_back(Caller.first);
-      AddedMIBNodesForAllCallerContexts &= buildMIBNodes(
-          Caller.second, Ctx, MIBCallStack, MIBNodes,
-          NodeHasAmbiguousCallerContext, Node->DeepestAmbiguousAllocType);
+      AddedMIBNodesForAllCallerContexts &=
+          buildMIBNodes(Caller.second, Ctx, MIBCallStack, NewMIBNodes,
+                        NodeHasAmbiguousCallerContext);
       // Remove Caller.
       MIBCallStack.pop_back();
     }
+    // Pass in the stack length of the MIB nodes added for the immediate caller,
+    // which is the current stack length plus 1.
+    SaveFilteredNewMIBNodes(NewMIBNodes, MIBNodes, MIBCallStack.size() + 1);
     if (AddedMIBNodesForAllCallerContexts)
       return true;
     // We expect that the callers should be forced to add MIBs to disambiguate
@@ -372,13 +446,8 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
   // The CalleeHasAmbiguousCallerContext flag is meant to say whether the
   // callee of the given node has more than one caller. Here the node being
   // passed in is the alloc and it has no callees. So it's false.
-  // Similarly, the last parameter is meant to say whether the callee of the
-  // given node is the deepest point where we have ambiguous alloc types, which
-  // is also false as the alloc has no callees.
-  bool DeepestAmbiguousAllocType = true;
   if (buildMIBNodes(Alloc, Ctx, MIBCallStack, MIBNodes,
-                    /*CalleeHasAmbiguousCallerContext=*/false,
-                    DeepestAmbiguousAllocType)) {
+                    /*CalleeHasAmbiguousCallerContext=*/false)) {
     assert(MIBCallStack.size() == 1 &&
            "Should only be left with Alloc's location in stack");
     CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes));
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index 73226df861ea5..4a3ddcc38b263 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -63,15 +63,20 @@
 ;; give both memprof and pgo metadata.
 ; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO
 
-;; Check that the total sizes are reported if requested.
-; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes -memprof-keep-all-not-cold-contexts 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES
+;; Check that the total sizes are reported if requested. A message should be
+;; emitted for the pruned context.
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES,TOTALSIZENOKEEPALL
+
+;; Check that the total sizes are reported if requested, and prevent pruning
+;; via -memprof-keep-all-not-cold-contexts.
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes -memprof-keep-all-not-cold-contexts 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES,TOTALSIZESKEEPALL
 
 ;; Check that we hint additional allocations with a threshold < 100%
 ; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes -memprof-matching-cold-threshold=60 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZESTHRESH60
 
 ;; Make sure that the -memprof-cloning-cold-threshold flag is enough to cause
 ;; the size metadata to be generated for the LTO link.
-; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-cloning-cold-threshold=80 -memprof-keep-all-not-cold-contexts 2>&1 | FileCheck %s --check-prefixes=TOTALSIZES
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-cloning-cold-threshold=80 -memprof-keep-all-not-cold-contexts 2>&1 | FileCheck %s --check-prefixes=TOTALSIZES,TOTALSIZESKEEPALL
 
 ;; Make sure we emit a random hotness seed if requested.
 ; RUN: llvm-profdata merge -memprof-random-hotness %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand 2>&1 | FileCheck %s --check-prefix=RAND
@@ -370,6 +375,8 @@ for.end:                                          ; preds = %for.cond
 ; MEMPROF: ![[C10]] = !{i64 2061451396820446691}
 ; MEMPROF: ![[C11]] = !{i64 1544787832369987002}
 
+; TOTALSIZENOKEEPALL: Total size for pruned non-cold full allocation context hash 1093248920606587996: 10
+
 ;; For non-context sensitive allocations that get attributes we emit a message
 ;; with the full allocation context hash, type, and size in bytes.
 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 8525406123785421946 and dominant alloc type cold: 10
@@ -393,8 +400,8 @@ for.end:                                          ; preds = %for.cond
 ; TOTALSIZES: !"cold", ![[CONTEXT4:[0-9]+]], ![[CONTEXT5:[0-9]+]]}
 ; TOTALSIZES: ![[CONTEXT4]] = !{i64 -2103941543456458045, i64 10}
 ; TOTALSIZES: ![[CONTEXT5]] = !{i64 -191931298737547222, i64 10}
-; TOTALSIZES: !"notcold", ![[CONTEXT6:[0-9]+]]}
-; TOTALSIZES: ![[CONTEXT6]] = !{i64 1093248920606587996, i64 10}
+; TOTALSIZESKEEPALL: !"notcold", ![[CONTEXT6:[0-9]+]]}
+; TOTALSIZESKEEPALL: ![[CONTEXT6]] = !{i64 1093248920606587996, i64 10}
 
 ; MEMPROFNOCOLINFO: #[[A1]] = { builtin allocsize(0) "memprof"="notcold" }
 ; MEMPROFNOCOLINFO: #[[A2]] = { builtin allocsize(0) "memprof"="cold" }

>From 7245e21e896a39fbbbbe38e800791411ebde4011 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Wed, 7 May 2025 17:37:38 -0700
Subject: [PATCH 074/115] [NFC][Support] Add llvm::uninitialized_copy (#138174)

Add `llvm::uninitialized_copy` that accepts a range instead of start/end
iterator for the source of the copy.
---
 clang/include/clang/AST/DeclCXX.h             |  6 +-
 clang/include/clang/AST/DeclOpenACC.h         |  9 +--
 clang/include/clang/AST/ExprCXX.h             |  6 +-
 clang/include/clang/AST/OpenACCClause.h       | 72 +++++++------------
 clang/include/clang/AST/StmtOpenACC.h         | 54 +++++++-------
 clang/include/clang/Sema/ParsedTemplate.h     |  4 +-
 clang/lib/AST/Decl.cpp                        |  6 +-
 clang/lib/AST/DeclObjC.cpp                    |  4 +-
 clang/lib/AST/DeclTemplate.cpp                | 11 ++-
 clang/lib/AST/Expr.cpp                        |  8 +--
 clang/lib/AST/ExprCXX.cpp                     | 16 ++---
 clang/lib/AST/OpenACCClause.cpp               | 13 ++--
 clang/lib/AST/StmtOpenACC.cpp                 |  4 +-
 clang/lib/AST/Type.cpp                        |  8 +--
 clang/tools/libclang/CXIndexDataConsumer.cpp  |  3 +-
 lldb/source/Utility/Checksum.cpp              |  5 +-
 llvm/include/llvm/ADT/ArrayRef.h              |  2 +-
 llvm/include/llvm/ADT/STLExtras.h             |  5 ++
 llvm/lib/Analysis/ScalarEvolution.cpp         | 10 +--
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |  3 +-
 llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp   |  4 +-
 llvm/lib/DebugInfo/MSF/MSFBuilder.cpp         |  6 +-
 llvm/lib/IR/AttributeImpl.h                   |  2 +-
 llvm/lib/ObjectYAML/MinidumpEmitter.cpp       |  2 +-
 llvm/lib/Support/FoldingSet.cpp               |  4 +-
 llvm/lib/TableGen/Record.cpp                  | 23 +++---
 .../AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp      |  2 +-
 llvm/lib/Transforms/IPO/LowerTypeTests.cpp    |  6 +-
 .../unittests/Support/TrailingObjectsTest.cpp | 19 ++---
 mlir/include/mlir/IR/BuiltinAttributes.td     |  2 +-
 mlir/include/mlir/Support/StorageUniquer.h    |  4 +-
 .../Dialect/Affine/Analysis/NestedMatcher.cpp |  4 +-
 mlir/lib/IR/AffineMapDetail.h                 |  3 +-
 mlir/lib/IR/Location.cpp                      |  8 +--
 mlir/lib/IR/MLIRContext.cpp                   |  2 +-
 mlir/lib/IR/TypeDetail.h                      |  3 +-
 mlir/lib/Tools/PDLL/AST/Nodes.cpp             | 42 ++++-------
 37 files changed, 164 insertions(+), 221 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index fc84e04dcc398..b7980137002aa 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -3864,8 +3864,7 @@ class UsingPackDecl final
                   InstantiatedFrom ? InstantiatedFrom->getDeclName()
                                    : DeclarationName()),
         InstantiatedFrom(InstantiatedFrom), NumExpansions(UsingDecls.size()) {
-    std::uninitialized_copy(UsingDecls.begin(), UsingDecls.end(),
-                            getTrailingObjects<NamedDecl *>());
+    llvm::uninitialized_copy(UsingDecls, getTrailingObjects<NamedDecl *>());
   }
 
   void anchor() override;
@@ -4236,8 +4235,7 @@ class DecompositionDecl final
       : VarDecl(Decomposition, C, DC, StartLoc, LSquareLoc, nullptr, T, TInfo,
                 SC),
         NumBindings(Bindings.size()) {
-    std::uninitialized_copy(Bindings.begin(), Bindings.end(),
-                            getTrailingObjects<BindingDecl *>());
+    llvm::uninitialized_copy(Bindings, getTrailingObjects<BindingDecl *>());
     for (auto *B : Bindings) {
       B->setDecomposedDecl(this);
       if (B->isParameterPack() && B->getBinding()) {
diff --git a/clang/include/clang/AST/DeclOpenACC.h b/clang/include/clang/AST/DeclOpenACC.h
index 8c612fbf1ec07..905d9bf636ea1 100644
--- a/clang/include/clang/AST/DeclOpenACC.h
+++ b/clang/include/clang/AST/DeclOpenACC.h
@@ -18,6 +18,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/OpenACCClause.h"
 #include "clang/Basic/OpenACCKinds.h"
+#include "llvm/ADT/STLExtras.h"
 
 namespace clang {
 
@@ -85,8 +86,8 @@ class OpenACCDeclareDecl final
       : OpenACCConstructDecl(OpenACCDeclare, DC, OpenACCDirectiveKind::Declare,
                              StartLoc, DirLoc, EndLoc) {
     // Initialize the trailing storage.
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
 
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
@@ -136,8 +137,8 @@ class OpenACCRoutineDecl final
     assert(LParenLoc.isValid() &&
            "Cannot represent implicit name with this declaration");
     // Initialize the trailing storage.
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index 04d08b022c562..8710f252a0c5c 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -41,6 +41,7 @@
 #include "clang/Basic/TypeTraits.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Casting.h"
@@ -4417,7 +4418,7 @@ class SizeOfPackExpr final
     assert((!Length || PartialArgs.empty()) &&
            "have partial args for non-dependent sizeof... expression");
     auto *Args = getTrailingObjects<TemplateArgument>();
-    std::uninitialized_copy(PartialArgs.begin(), PartialArgs.end(), Args);
+    llvm::uninitialized_copy(PartialArgs, Args);
     setDependence(Length ? ExprDependence::None
                          : ExprDependence::ValueInstantiation);
   }
@@ -4522,8 +4523,7 @@ class PackIndexingExpr final
         FullySubstituted(FullySubstituted) {
 
     auto *Exprs = getTrailingObjects<Expr *>();
-    std::uninitialized_copy(SubstitutedExprs.begin(), SubstitutedExprs.end(),
-                            Exprs);
+    llvm::uninitialized_copy(SubstitutedExprs, Exprs);
 
     setDependence(computeDependence(this));
     if (!isInstantiationDependent())
diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h
index 449bcb71f9f32..65377b91f83d3 100644
--- a/clang/include/clang/AST/OpenACCClause.h
+++ b/clang/include/clang/AST/OpenACCClause.h
@@ -13,9 +13,11 @@
 
 #ifndef LLVM_CLANG_AST_OPENACCCLAUSE_H
 #define LLVM_CLANG_AST_OPENACCCLAUSE_H
+
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/StmtIterator.h"
 #include "clang/Basic/OpenACCKinds.h"
+#include "llvm/ADT/STLExtras.h"
 
 #include <utility>
 #include <variant>
@@ -291,8 +293,7 @@ class OpenACCDeviceTypeClause final
            "Only a single asterisk version is permitted, and must be the "
            "only one");
 
-    std::uninitialized_copy(Archs.begin(), Archs.end(),
-                            getTrailingObjects<DeviceTypeArgument>());
+    llvm::uninitialized_copy(Archs, getTrailingObjects<DeviceTypeArgument>());
   }
 
 public:
@@ -537,10 +538,9 @@ class OpenACCWaitClause final
         QueuesLoc(QueuesLoc) {
     // The first element of the trailing storage is always the devnum expr,
     // whether it is used or not.
-    std::uninitialized_copy(&DevNumExpr, &DevNumExpr + 1,
-                            getTrailingObjects<Expr *>());
-    std::uninitialized_copy(QueueIdExprs.begin(), QueueIdExprs.end(),
-                            getTrailingObjects<Expr *>() + 1);
+    auto *Exprs = getTrailingObjects<Expr *>();
+    llvm::uninitialized_copy(ArrayRef(DevNumExpr), Exprs);
+    llvm::uninitialized_copy(QueueIdExprs, Exprs + 1);
     setExprs(
         MutableArrayRef(getTrailingObjects<Expr *>(), QueueIdExprs.size() + 1));
   }
@@ -579,8 +579,7 @@ class OpenACCNumGangsClause final
                         ArrayRef<Expr *> IntExprs, SourceLocation EndLoc)
       : OpenACCClauseWithExprs(OpenACCClauseKind::NumGangs, BeginLoc, LParenLoc,
                                EndLoc) {
-    std::uninitialized_copy(IntExprs.begin(), IntExprs.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(IntExprs, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), IntExprs.size()));
   }
 
@@ -609,8 +608,7 @@ class OpenACCTileClause final
                     ArrayRef<Expr *> SizeExprs, SourceLocation EndLoc)
       : OpenACCClauseWithExprs(OpenACCClauseKind::Tile, BeginLoc, LParenLoc,
                                EndLoc) {
-    std::uninitialized_copy(SizeExprs.begin(), SizeExprs.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(SizeExprs, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), SizeExprs.size()));
   }
 
@@ -848,8 +846,7 @@ class OpenACCPrivateClause final
                        ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::Private, BeginLoc,
                                  LParenLoc, EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -871,8 +868,7 @@ class OpenACCFirstPrivateClause final
                             ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::FirstPrivate, BeginLoc,
                                  LParenLoc, EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -894,8 +890,7 @@ class OpenACCDevicePtrClause final
                          ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::DevicePtr, BeginLoc,
                                  LParenLoc, EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -917,8 +912,7 @@ class OpenACCAttachClause final
                       ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::Attach, BeginLoc, LParenLoc,
                                  EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -940,8 +934,7 @@ class OpenACCDetachClause final
                       ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::Detach, BeginLoc, LParenLoc,
                                  EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -963,8 +956,7 @@ class OpenACCDeleteClause final
                       ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::Delete, BeginLoc, LParenLoc,
                                  EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -986,8 +978,7 @@ class OpenACCUseDeviceClause final
                          ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::UseDevice, BeginLoc,
                                  LParenLoc, EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1009,8 +1000,7 @@ class OpenACCNoCreateClause final
                         ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::NoCreate, BeginLoc,
                                  LParenLoc, EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1032,8 +1022,7 @@ class OpenACCPresentClause final
                        ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::Present, BeginLoc,
                                  LParenLoc, EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1054,8 +1043,7 @@ class OpenACCHostClause final
                     ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::Host, BeginLoc, LParenLoc,
                                  EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1078,8 +1066,7 @@ class OpenACCDeviceClause final
                       ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::Device, BeginLoc, LParenLoc,
                                  EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1107,8 +1094,7 @@ class OpenACCCopyClause final
             Spelling == OpenACCClauseKind::PCopy ||
             Spelling == OpenACCClauseKind::PresentOrCopy) &&
            "Invalid clause kind for copy-clause");
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1142,8 +1128,7 @@ class OpenACCCopyInClause final
             Spelling == OpenACCClauseKind::PCopyIn ||
             Spelling == OpenACCClauseKind::PresentOrCopyIn) &&
            "Invalid clause kind for copyin-clause");
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1176,8 +1161,7 @@ class OpenACCCopyOutClause final
             Spelling == OpenACCClauseKind::PCopyOut ||
             Spelling == OpenACCClauseKind::PresentOrCopyOut) &&
            "Invalid clause kind for copyout-clause");
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1210,8 +1194,7 @@ class OpenACCCreateClause final
             Spelling == OpenACCClauseKind::PCreate ||
             Spelling == OpenACCClauseKind::PresentOrCreate) &&
            "Invalid clause kind for create-clause");
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1241,8 +1224,7 @@ class OpenACCReductionClause final
       : OpenACCClauseWithVarList(OpenACCClauseKind::Reduction, BeginLoc,
                                  LParenLoc, EndLoc),
         Op(Operator) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1268,8 +1250,7 @@ class OpenACCLinkClause final
                     ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::Link, BeginLoc, LParenLoc,
                                  EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
@@ -1293,8 +1274,7 @@ class OpenACCDeviceResidentClause final
                               ArrayRef<Expr *> VarList, SourceLocation EndLoc)
       : OpenACCClauseWithVarList(OpenACCClauseKind::DeviceResident, BeginLoc,
                                  LParenLoc, EndLoc) {
-    std::uninitialized_copy(VarList.begin(), VarList.end(),
-                            getTrailingObjects<Expr *>());
+    llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
     setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), VarList.size()));
   }
 
diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h
index 39c4c81844911..9aae91372e264 100644
--- a/clang/include/clang/AST/StmtOpenACC.h
+++ b/clang/include/clang/AST/StmtOpenACC.h
@@ -17,6 +17,7 @@
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/OpenACCKinds.h"
 #include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/STLExtras.h"
 #include <memory>
 
 namespace clang {
@@ -159,8 +160,8 @@ class OpenACCComputeConstruct final
            "represented by this type");
 
     // Initialize the trailing storage.
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
 
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
@@ -272,8 +273,8 @@ class OpenACCCombinedConstruct final
            "Only parallel loop, serial loop, and kernels loop constructs "
            "should be represented by this type");
 
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -322,8 +323,8 @@ class OpenACCDataConstruct final
       : OpenACCAssociatedStmtConstruct(OpenACCDataConstructClass,
                                        OpenACCDirectiveKind::Data, Start,
                                        DirectiveLoc, End, StructuredBlock) {
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -368,8 +369,8 @@ class OpenACCEnterDataConstruct final
       : OpenACCConstructStmt(OpenACCEnterDataConstructClass,
                              OpenACCDirectiveKind::EnterData, Start,
                              DirectiveLoc, End) {
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -406,8 +407,8 @@ class OpenACCExitDataConstruct final
       : OpenACCConstructStmt(OpenACCExitDataConstructClass,
                              OpenACCDirectiveKind::ExitData, Start,
                              DirectiveLoc, End) {
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -447,8 +448,8 @@ class OpenACCHostDataConstruct final
       : OpenACCAssociatedStmtConstruct(OpenACCHostDataConstructClass,
                                        OpenACCDirectiveKind::HostData, Start,
                                        DirectiveLoc, End, StructuredBlock) {
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -525,11 +526,8 @@ class OpenACCWaitConstruct final
            "NumExprs should always be >= 1 because the 'devnum' "
            "expr is represented by a null if necessary");
 
-    std::uninitialized_copy(&DevNumExpr, &DevNumExpr + 1,
-                            getExprPtr());
-    std::uninitialized_copy(QueueIdExprs.begin(), QueueIdExprs.end(),
-                            getExprPtr() + 1);
-
+    llvm::uninitialized_copy(ArrayRef(DevNumExpr), getExprPtr());
+    llvm::uninitialized_copy(QueueIdExprs, getExprPtr() + 1);
     std::uninitialized_copy(const_cast<OpenACCClause **>(Clauses.begin()),
                             const_cast<OpenACCClause **>(Clauses.end()),
                             getTrailingObjects<OpenACCClause *>());
@@ -624,7 +622,7 @@ class OpenACCCacheConstruct final
         ParensLoc(LParenLoc, RParenLoc), ReadOnlyLoc(ReadOnlyLoc),
         NumVars(VarList.size()) {
 
-    std::uninitialized_copy(VarList.begin(), VarList.end(), getVarListPtr());
+    llvm::uninitialized_copy(VarList, getVarListPtr());
   }
 
   Expr **getVarListPtr() const {
@@ -690,8 +688,8 @@ class OpenACCInitConstruct final
       : OpenACCConstructStmt(OpenACCInitConstructClass,
                              OpenACCDirectiveKind::Init, Start, DirectiveLoc,
                              End) {
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -730,8 +728,8 @@ class OpenACCShutdownConstruct final
       : OpenACCConstructStmt(OpenACCShutdownConstructClass,
                              OpenACCDirectiveKind::Shutdown, Start,
                              DirectiveLoc, End) {
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -770,8 +768,8 @@ class OpenACCSetConstruct final
       : OpenACCConstructStmt(OpenACCSetConstructClass,
                              OpenACCDirectiveKind::Set, Start, DirectiveLoc,
                              End) {
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -810,8 +808,8 @@ class OpenACCUpdateConstruct final
       : OpenACCConstructStmt(OpenACCUpdateConstructClass,
                              OpenACCDirectiveKind::Update, Start, DirectiveLoc,
                              End) {
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
   }
@@ -859,8 +857,8 @@ class OpenACCAtomicConstruct final
                                        DirectiveLoc, End, AssociatedStmt),
         AtomicKind(AtKind) {
     // Initialize the trailing storage.
-    std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                            getTrailingObjects<const OpenACCClause *>());
+    llvm::uninitialized_copy(Clauses,
+                             getTrailingObjects<const OpenACCClause *>());
 
     setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                   Clauses.size()));
diff --git a/clang/include/clang/Sema/ParsedTemplate.h b/clang/include/clang/Sema/ParsedTemplate.h
index ac4dbbf294caf..cff7f04666358 100644
--- a/clang/include/clang/Sema/ParsedTemplate.h
+++ b/clang/include/clang/Sema/ParsedTemplate.h
@@ -19,6 +19,7 @@
 #include "clang/Basic/TemplateKinds.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/Ownership.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include <cassert>
 #include <cstdlib>
@@ -249,8 +250,7 @@ namespace clang {
           Kind(TemplateKind), LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc),
           NumArgs(TemplateArgs.size()), ArgsInvalid(ArgsInvalid) {
 
-      std::uninitialized_copy(TemplateArgs.begin(), TemplateArgs.end(),
-                              getTemplateArgs());
+      llvm::uninitialized_copy(TemplateArgs, getTemplateArgs());
     }
     ~TemplateIdAnnotation() = default;
   };
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 07b4d77bd2ab7..cbac75e9d109b 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3123,8 +3123,7 @@ FunctionDecl::DefaultedOrDeletedFunctionInfo::Create(
   Info->NumLookups = Lookups.size();
   Info->HasDeletedMessage = DeletedMessage != nullptr;
 
-  std::uninitialized_copy(Lookups.begin(), Lookups.end(),
-                          Info->getTrailingObjects<DeclAccessPair>());
+  llvm::uninitialized_copy(Lookups, Info->getTrailingObjects<DeclAccessPair>());
   if (DeletedMessage)
     *Info->getTrailingObjects<StringLiteral *>() = DeletedMessage;
   return Info;
@@ -5869,8 +5868,7 @@ ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc,
       NextLocalImportAndComplete(nullptr, true) {
   assert(getNumModuleIdentifiers(Imported) == IdentifierLocs.size());
   auto *StoredLocs = getTrailingObjects<SourceLocation>();
-  std::uninitialized_copy(IdentifierLocs.begin(), IdentifierLocs.end(),
-                          StoredLocs);
+  llvm::uninitialized_copy(IdentifierLocs, StoredLocs);
 }
 
 ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc,
diff --git a/clang/lib/AST/DeclObjC.cpp b/clang/lib/AST/DeclObjC.cpp
index 5c107325df30c..596262e217984 100644
--- a/clang/lib/AST/DeclObjC.cpp
+++ b/clang/lib/AST/DeclObjC.cpp
@@ -929,8 +929,8 @@ void ObjCMethodDecl::setParamsAndSelLocs(ASTContext &C,
   unsigned Size = sizeof(ParmVarDecl *) * NumParams +
                   sizeof(SourceLocation) * SelLocs.size();
   ParamsAndSelLocs = C.Allocate(Size);
-  std::uninitialized_copy(Params.begin(), Params.end(), getParams());
-  std::uninitialized_copy(SelLocs.begin(), SelLocs.end(), getStoredSelLocs());
+  llvm::uninitialized_copy(Params, getParams());
+  llvm::uninitialized_copy(SelLocs, getStoredSelLocs());
 }
 
 void ObjCMethodDecl::getSelectorLocs(
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index 79a36109276f0..d058831b9f6bf 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -884,9 +884,8 @@ TemplateTemplateParmDecl::TemplateTemplateParmDecl(
     : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
       TemplateParmPosition(D, P), Typename(Typename), ParameterPack(true),
       ExpandedParameterPack(true), NumExpandedParams(Expansions.size()) {
-  if (!Expansions.empty())
-    std::uninitialized_copy(Expansions.begin(), Expansions.end(),
-                            getTrailingObjects<TemplateParameterList *>());
+  llvm::uninitialized_copy(Expansions,
+                           getTrailingObjects<TemplateParameterList *>());
 }
 
 TemplateTemplateParmDecl *
@@ -944,8 +943,7 @@ void TemplateTemplateParmDecl::setDefaultArgument(
 //===----------------------------------------------------------------------===//
 TemplateArgumentList::TemplateArgumentList(ArrayRef<TemplateArgument> Args)
     : NumArguments(Args.size()) {
-  std::uninitialized_copy(Args.begin(), Args.end(),
-                          getTrailingObjects<TemplateArgument>());
+  llvm::uninitialized_copy(Args, getTrailingObjects<TemplateArgument>());
 }
 
 TemplateArgumentList *
@@ -1172,8 +1170,7 @@ ImplicitConceptSpecializationDecl::CreateDeserialized(
 void ImplicitConceptSpecializationDecl::setTemplateArguments(
     ArrayRef<TemplateArgument> Converted) {
   assert(Converted.size() == NumTemplateArgs);
-  std::uninitialized_copy(Converted.begin(), Converted.end(),
-                          getTrailingObjects<TemplateArgument>());
+  llvm::uninitialized_copy(Converted, getTrailingObjects<TemplateArgument>());
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 59c0e47c7c195..a4483a285ed4f 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -2111,8 +2111,8 @@ ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T,
   ImplicitCastExpr *E =
       new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, FPO, VK);
   if (PathSize)
-    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
-                              E->getTrailingObjects<CXXBaseSpecifier *>());
+    llvm::uninitialized_copy(*BasePath,
+                             E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
@@ -2138,8 +2138,8 @@ CStyleCastExpr *CStyleCastExpr::Create(const ASTContext &C, QualType T,
   CStyleCastExpr *E =
       new (Buffer) CStyleCastExpr(T, VK, K, Op, PathSize, FPO, WrittenTy, L, R);
   if (PathSize)
-    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
-                              E->getTrailingObjects<CXXBaseSpecifier *>());
+    llvm::uninitialized_copy(*BasePath,
+                             E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp
index 169f11b611066..00bddce3a1ee2 100644
--- a/clang/lib/AST/ExprCXX.cpp
+++ b/clang/lib/AST/ExprCXX.cpp
@@ -772,8 +772,8 @@ CXXStaticCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK,
   auto *E = new (Buffer) CXXStaticCastExpr(T, VK, K, Op, PathSize, WrittenTy,
                                            FPO, L, RParenLoc, AngleBrackets);
   if (PathSize)
-    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
-                              E->getTrailingObjects<CXXBaseSpecifier *>());
+    llvm::uninitialized_copy(*BasePath,
+                             E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
@@ -800,8 +800,8 @@ CXXDynamicCastExpr *CXXDynamicCastExpr::Create(const ASTContext &C, QualType T,
       new (Buffer) CXXDynamicCastExpr(T, VK, K, Op, PathSize, WrittenTy, L,
                                       RParenLoc, AngleBrackets);
   if (PathSize)
-    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
-                              E->getTrailingObjects<CXXBaseSpecifier *>());
+    llvm::uninitialized_copy(*BasePath,
+                             E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
@@ -863,8 +863,8 @@ CXXReinterpretCastExpr::Create(const ASTContext &C, QualType T,
       new (Buffer) CXXReinterpretCastExpr(T, VK, K, Op, PathSize, WrittenTy, L,
                                           RParenLoc, AngleBrackets);
   if (PathSize)
-    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
-                              E->getTrailingObjects<CXXBaseSpecifier *>());
+    llvm::uninitialized_copy(*BasePath,
+                             E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
@@ -911,8 +911,8 @@ CXXFunctionalCastExpr *CXXFunctionalCastExpr::Create(
   auto *E = new (Buffer)
       CXXFunctionalCastExpr(T, VK, Written, K, Op, PathSize, FPO, L, R);
   if (PathSize)
-    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
-                              E->getTrailingObjects<CXXBaseSpecifier *>());
+    llvm::uninitialized_copy(*BasePath,
+                             E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp
index 9d3645e6da1ca..526ea89a2cee3 100644
--- a/clang/lib/AST/OpenACCClause.cpp
+++ b/clang/lib/AST/OpenACCClause.cpp
@@ -114,8 +114,7 @@ OpenACCSelfClause::OpenACCSelfClause(SourceLocation BeginLoc,
     : OpenACCClauseWithParams(OpenACCClauseKind::Self, BeginLoc, LParenLoc,
                               EndLoc),
       HasConditionExpr(std::nullopt), NumExprs(VarList.size()) {
-  std::uninitialized_copy(VarList.begin(), VarList.end(),
-                          getTrailingObjects<Expr *>());
+  llvm::uninitialized_copy(VarList, getTrailingObjects<Expr *>());
 }
 
 OpenACCSelfClause::OpenACCSelfClause(SourceLocation BeginLoc,
@@ -127,8 +126,8 @@ OpenACCSelfClause::OpenACCSelfClause(SourceLocation BeginLoc,
   assert((!ConditionExpr || ConditionExpr->isInstantiationDependent() ||
           ConditionExpr->getType()->isScalarType()) &&
          "Condition expression type not scalar/dependent");
-  std::uninitialized_copy(&ConditionExpr, &ConditionExpr + 1,
-                          getTrailingObjects<Expr *>());
+  llvm::uninitialized_copy(ArrayRef(ConditionExpr),
+                           getTrailingObjects<Expr *>());
 }
 
 OpenACCClause::child_range OpenACCClause::children() {
@@ -167,11 +166,9 @@ OpenACCGangClause::OpenACCGangClause(SourceLocation BeginLoc,
     : OpenACCClauseWithExprs(OpenACCClauseKind::Gang, BeginLoc, LParenLoc,
                              EndLoc) {
   assert(GangKinds.size() == IntExprs.size() && "Mismatch exprs/kind?");
-  std::uninitialized_copy(IntExprs.begin(), IntExprs.end(),
-                          getTrailingObjects<Expr *>());
+  llvm::uninitialized_copy(IntExprs, getTrailingObjects<Expr *>());
   setExprs(MutableArrayRef(getTrailingObjects<Expr *>(), IntExprs.size()));
-  std::uninitialized_copy(GangKinds.begin(), GangKinds.end(),
-                          getTrailingObjects<OpenACCGangKind>());
+  llvm::uninitialized_copy(GangKinds, getTrailingObjects<OpenACCGangKind>());
 }
 
 OpenACCNumWorkersClause *
diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp
index c45eca92dc874..268e411cee9c6 100644
--- a/clang/lib/AST/StmtOpenACC.cpp
+++ b/clang/lib/AST/StmtOpenACC.cpp
@@ -61,8 +61,8 @@ OpenACCLoopConstruct::OpenACCLoopConstruct(
   assert((Loop == nullptr || isa<ForStmt, CXXForRangeStmt>(Loop)) &&
          "Associated Loop not a for loop?");
   // Initialize the trailing storage.
-  std::uninitialized_copy(Clauses.begin(), Clauses.end(),
-                          getTrailingObjects<const OpenACCClause *>());
+  llvm::uninitialized_copy(Clauses,
+                           getTrailingObjects<const OpenACCClause *>());
 
   setClauseList(MutableArrayRef(getTrailingObjects<const OpenACCClause *>(),
                                 Clauses.size()));
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 392a95d042353..31e4bcd7535ea 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -3784,13 +3784,13 @@ FunctionProtoType::FunctionProtoType(QualType result, ArrayRef<QualType> params,
 
     ArrayRef<FunctionEffect> SrcFX = epi.FunctionEffects.effects();
     auto *DestFX = getTrailingObjects<FunctionEffect>();
-    std::uninitialized_copy(SrcFX.begin(), SrcFX.end(), DestFX);
+    llvm::uninitialized_copy(SrcFX, DestFX);
 
     ArrayRef<EffectConditionExpr> SrcConds = epi.FunctionEffects.conditions();
     if (!SrcConds.empty()) {
       ExtraBits.EffectsHaveConditions = true;
       auto *DestConds = getTrailingObjects<EffectConditionExpr>();
-      std::uninitialized_copy(SrcConds.begin(), SrcConds.end(), DestConds);
+      llvm::uninitialized_copy(SrcConds, DestConds);
       assert(std::any_of(SrcConds.begin(), SrcConds.end(),
                          [](const EffectConditionExpr &EC) {
                            if (const Expr *E = EC.getCondition())
@@ -4134,9 +4134,7 @@ PackIndexingType::PackIndexingType(const ASTContext &Context,
            computeDependence(Pattern, IndexExpr, Expansions)),
       Context(Context), Pattern(Pattern), IndexExpr(IndexExpr),
       Size(Expansions.size()), FullySubstituted(FullySubstituted) {
-
-  std::uninitialized_copy(Expansions.begin(), Expansions.end(),
-                          getTrailingObjects<QualType>());
+  llvm::uninitialized_copy(Expansions, getTrailingObjects<QualType>());
 }
 
 UnsignedOrNone PackIndexingType::getSelectedIndex() const {
diff --git a/clang/tools/libclang/CXIndexDataConsumer.cpp b/clang/tools/libclang/CXIndexDataConsumer.cpp
index ced94e13baf12..2b2e70d60d1d6 100644
--- a/clang/tools/libclang/CXIndexDataConsumer.cpp
+++ b/clang/tools/libclang/CXIndexDataConsumer.cpp
@@ -15,6 +15,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/DeclVisitor.h"
 #include "clang/Frontend/ASTUnit.h"
+#include "llvm/ADT/STLExtras.h"
 
 using namespace clang;
 using namespace clang::index;
@@ -409,7 +410,7 @@ const char *ScratchAlloc::toCStr(StringRef Str) {
 
 const char *ScratchAlloc::copyCStr(StringRef Str) {
   char *buf = IdxCtx.StrScratch.Allocate<char>(Str.size() + 1);
-  std::uninitialized_copy(Str.begin(), Str.end(), buf);
+  llvm::uninitialized_copy(Str, buf);
   buf[Str.size()] = '\0';
   return buf;
 }
diff --git a/lldb/source/Utility/Checksum.cpp b/lldb/source/Utility/Checksum.cpp
index 8943b4e128520..343785ed33954 100644
--- a/lldb/source/Utility/Checksum.cpp
+++ b/lldb/source/Utility/Checksum.cpp
@@ -21,10 +21,7 @@ Checksum &Checksum::operator=(const Checksum &checksum) {
   return *this;
 }
 
-void Checksum::SetMD5(llvm::MD5::MD5Result md5) {
-  const constexpr size_t md5_length = 16;
-  std::uninitialized_copy_n(md5.begin(), md5_length, m_checksum.begin());
-}
+void Checksum::SetMD5(llvm::MD5::MD5Result md5) { m_checksum = md5; }
 
 Checksum::operator bool() const { return !llvm::equal(m_checksum, g_sentinel); }
 
diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index 1f2433b9a7667..4819c88471345 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -190,7 +190,7 @@ namespace llvm {
     // copy - Allocate copy in Allocator and return ArrayRef<T> to it.
     template <typename Allocator> MutableArrayRef<T> copy(Allocator &A) {
       T *Buff = A.template Allocate<T>(Length);
-      std::uninitialized_copy(begin(), end(), Buff);
+      llvm::uninitialized_copy(*this, Buff);
       return MutableArrayRef<T>(Buff, Length);
     }
 
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index dc0443c9244be..8926489faf391 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -2038,6 +2038,11 @@ template <typename R1, typename R2> auto mismatch(R1 &&Range1, R2 &&Range2) {
                        adl_end(Range2));
 }
 
+template <typename R, typename IterTy>
+auto uninitialized_copy(R &&Src, IterTy Dst) {
+  return std::uninitialized_copy(adl_begin(Src), adl_end(Src), Dst);
+}
+
 template <typename R>
 void stable_sort(R &&Range) {
   std::stable_sort(adl_begin(Range), adl_end(Range));
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 43d200f1153d0..ac69ad598a65a 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2981,7 +2981,7 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
       static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    llvm::uninitialized_copy(Ops, O);
     S = new (SCEVAllocator)
         SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
@@ -3004,7 +3004,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
       static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    llvm::uninitialized_copy(Ops, O);
     S = new (SCEVAllocator)
         SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
     UniqueSCEVs.InsertNode(S, IP);
@@ -3027,7 +3027,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    llvm::uninitialized_copy(Ops, O);
     S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
                                         O, Ops.size());
     UniqueSCEVs.InsertNode(S, IP);
@@ -3932,7 +3932,7 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
   if (ExistingSCEV)
     return ExistingSCEV;
   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  llvm::uninitialized_copy(Ops, O);
   SCEV *S = new (SCEVAllocator)
       SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
 
@@ -4319,7 +4319,7 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
     return ExistingSCEV;
 
   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  llvm::uninitialized_copy(Ops, O);
   SCEV *S = new (SCEVAllocator)
       SCEVSequentialMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
 
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 4074ed65885c7..b533731e8dda3 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -542,8 +542,7 @@ class BitcodeConstant final : public Value,
       : Value(Ty, SubclassID), Opcode(Info.Opcode), Flags(Info.Flags),
         NumOperands(OpIDs.size()), BlockAddressBB(Info.BlockAddressBB),
         SrcElemTy(Info.SrcElemTy), InRange(Info.InRange) {
-    std::uninitialized_copy(OpIDs.begin(), OpIDs.end(),
-                            getTrailingObjects<unsigned>());
+    llvm::uninitialized_copy(OpIDs, getTrailingObjects<unsigned>());
   }
 
   BitcodeConstant &operator=(const BitcodeConstant &) = delete;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 55a3bfa459c3c..08b58669b3eb3 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -340,8 +340,8 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
           return createStringError(
               errc::invalid_argument,
               "failed to parse file entry because the MD5 hash is invalid");
-        std::uninitialized_copy_n(Value.getAsBlock()->begin(), 16,
-                                  FileEntry.Checksum.begin());
+        llvm::uninitialized_copy(*Value.getAsBlock(),
+                                 FileEntry.Checksum.begin());
         break;
       default:
         break;
diff --git a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
index ed2d14dd79e45..bb3411bb9568e 100644
--- a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -286,8 +286,7 @@ Expected<MSFLayout> MSFBuilder::generateLayout() {
   SB->NumBlocks = FreeBlocks.size();
 
   ulittle32_t *DirBlocks = Allocator.Allocate<ulittle32_t>(NumDirectoryBlocks);
-  std::uninitialized_copy_n(DirectoryBlocks.begin(), NumDirectoryBlocks,
-                            DirBlocks);
+  llvm::uninitialized_copy(DirectoryBlocks, DirBlocks);
   L.DirectoryBlocks = ArrayRef<ulittle32_t>(DirBlocks, NumDirectoryBlocks);
 
   // The stream sizes should be re-allocated as a stable pointer and the stream
@@ -300,8 +299,7 @@ Expected<MSFLayout> MSFBuilder::generateLayout() {
       Sizes[I] = StreamData[I].first;
       ulittle32_t *BlockList =
           Allocator.Allocate<ulittle32_t>(StreamData[I].second.size());
-      std::uninitialized_copy_n(StreamData[I].second.begin(),
-                                StreamData[I].second.size(), BlockList);
+      llvm::uninitialized_copy(StreamData[I].second, BlockList);
       L.StreamMap[I] =
           ArrayRef<ulittle32_t>(BlockList, StreamData[I].second.size());
     }
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index 59cc489ade40d..98d1bad7680ab 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -258,7 +258,7 @@ class ConstantRangeListAttributeImpl final
       : EnumAttributeImpl(ConstantRangeListAttrEntry, Kind), Size(Val.size()) {
     assert(Size > 0);
     ConstantRange *TrailingCR = getTrailingObjects<ConstantRange>();
-    std::uninitialized_copy(Val.begin(), Val.end(), TrailingCR);
+    llvm::uninitialized_copy(Val, TrailingCR);
   }
 
   ~ConstantRangeListAttributeImpl() {
diff --git a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
index 44cdfbdd80ea5..b27155162be6b 100644
--- a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
@@ -86,7 +86,7 @@ std::pair<size_t, MutableArrayRef<T>>
 BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
   size_t Num = std::distance(Range.begin(), Range.end());
   MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
-  std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
+  llvm::uninitialized_copy(Range, Array.begin());
   return {allocateArray(Array), Array};
 }
 
diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp
index 419bf67407684..977e4ca8c26ef 100644
--- a/llvm/lib/Support/FoldingSet.cpp
+++ b/llvm/lib/Support/FoldingSet.cpp
@@ -131,7 +131,7 @@ bool FoldingSetNodeID::operator<(FoldingSetNodeIDRef RHS) const {
 FoldingSetNodeIDRef
 FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
   unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
-  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
+  llvm::uninitialized_copy(Bits, New);
   return FoldingSetNodeIDRef(New, Bits.size());
 }
 
@@ -142,7 +142,7 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
 /// singly-linked-list. In order to make deletion more efficient, we make
 /// the list circular, so we can delete a node without computing its hash.
 /// The problem with this is that the start of the hash buckets are not
-/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
+/// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null:
 /// use GetBucketPtr when this happens.
 static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) {
   // The low bit is set if this is the pointer back to the bucket.
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 67f237cb7b634..f3d54e6083e48 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -240,8 +240,7 @@ static void ProfileRecordRecTy(FoldingSetNodeID &ID,
 
 RecordRecTy::RecordRecTy(RecordKeeper &RK, ArrayRef<const Record *> Classes)
     : RecTy(RecordRecTyKind, RK), NumClasses(Classes.size()) {
-  std::uninitialized_copy(Classes.begin(), Classes.end(),
-                          getTrailingObjects<const Record *>());
+  llvm::uninitialized_copy(Classes, getTrailingObjects<const Record *>());
 }
 
 const RecordRecTy *RecordRecTy::get(RecordKeeper &RK,
@@ -474,8 +473,7 @@ static void ProfileBitsInit(FoldingSetNodeID &ID,
 BitsInit::BitsInit(RecordKeeper &RK, ArrayRef<const Init *> Bits)
     : TypedInit(IK_BitsInit, BitsRecTy::get(RK, Bits.size())),
       NumBits(Bits.size()) {
-  std::uninitialized_copy(Bits.begin(), Bits.end(),
-                          getTrailingObjects<const Init *>());
+  llvm::uninitialized_copy(Bits, getTrailingObjects<const Init *>());
 }
 
 BitsInit *BitsInit::get(RecordKeeper &RK, ArrayRef<const Init *> Bits) {
@@ -708,8 +706,7 @@ static void ProfileListInit(FoldingSetNodeID &ID, ArrayRef<const Init *> Range,
 ListInit::ListInit(ArrayRef<const Init *> Elements, const RecTy *EltTy)
     : TypedInit(IK_ListInit, ListRecTy::get(EltTy)),
       NumValues(Elements.size()) {
-  std::uninitialized_copy(Elements.begin(), Elements.end(),
-                          getTrailingObjects<const Init *>());
+  llvm::uninitialized_copy(Elements, getTrailingObjects<const Init *>());
 }
 
 const ListInit *ListInit::get(ArrayRef<const Init *> Elements,
@@ -2435,8 +2432,7 @@ VarDefInit::VarDefInit(SMLoc Loc, const Record *Class,
                        ArrayRef<const ArgumentInit *> Args)
     : TypedInit(IK_VarDefInit, RecordRecTy::get(Class)), Loc(Loc), Class(Class),
       NumArgs(Args.size()) {
-  std::uninitialized_copy(Args.begin(), Args.end(),
-                          getTrailingObjects<const ArgumentInit *>());
+  llvm::uninitialized_copy(Args, getTrailingObjects<const ArgumentInit *>());
 }
 
 const VarDefInit *VarDefInit::get(SMLoc Loc, const Record *Class,
@@ -2621,9 +2617,8 @@ CondOpInit::CondOpInit(ArrayRef<const Init *> Conds,
                        ArrayRef<const Init *> Values, const RecTy *Type)
     : TypedInit(IK_CondOpInit, Type), NumConds(Conds.size()), ValType(Type) {
   auto *TrailingObjects = getTrailingObjects<const Init *>();
-  std::uninitialized_copy(Conds.begin(), Conds.end(), TrailingObjects);
-  std::uninitialized_copy(Values.begin(), Values.end(),
-                          TrailingObjects + NumConds);
+  llvm::uninitialized_copy(Conds, TrailingObjects);
+  llvm::uninitialized_copy(Values, TrailingObjects + NumConds);
 }
 
 void CondOpInit::Profile(FoldingSetNodeID &ID) const {
@@ -2757,10 +2752,8 @@ DagInit::DagInit(const Init *V, const StringInit *VN,
                  ArrayRef<const StringInit *> ArgNames)
     : TypedInit(IK_DagInit, DagRecTy::get(V->getRecordKeeper())), Val(V),
       ValName(VN), NumArgs(Args.size()) {
-  std::uninitialized_copy(Args.begin(), Args.end(),
-                          getTrailingObjects<const Init *>());
-  std::uninitialized_copy(ArgNames.begin(), ArgNames.end(),
-                          getTrailingObjects<const StringInit *>());
+  llvm::uninitialized_copy(Args, getTrailingObjects<const Init *>());
+  llvm::uninitialized_copy(ArgNames, getTrailingObjects<const StringInit *>());
 }
 
 const DagInit *DagInit::get(const Init *V, const StringInit *VN,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
index 678a7be1f2456..e5601dca17b6a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
@@ -36,7 +36,7 @@ AMDGPUMCExpr::AMDGPUMCExpr(VariantKind Kind, ArrayRef<const MCExpr *> Args,
   // allocation (e.g., through SmallVector's grow).
   RawArgs = static_cast<const MCExpr **>(
       Ctx.allocate(sizeof(const MCExpr *) * Args.size()));
-  std::uninitialized_copy(Args.begin(), Args.end(), RawArgs);
+  llvm::uninitialized_copy(Args, RawArgs);
   this->Args = ArrayRef<const MCExpr *>(RawArgs, Args.size());
 }
 
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index c93568943e833..d855647095550 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -297,8 +297,7 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
     GTM->NTypes = Types.size();
     GTM->IsJumpTableCanonical = IsJumpTableCanonical;
     GTM->IsExported = IsExported;
-    std::uninitialized_copy(Types.begin(), Types.end(),
-                            GTM->getTrailingObjects<MDNode *>());
+    llvm::copy(Types, GTM->getTrailingObjects<MDNode *>());
     return GTM;
   }
 
@@ -330,8 +329,7 @@ struct ICallBranchFunnel final
     Call->CI = CI;
     Call->UniqueId = UniqueId;
     Call->NTargets = Targets.size();
-    std::uninitialized_copy(Targets.begin(), Targets.end(),
-                            Call->getTrailingObjects<GlobalTypeMember *>());
+    llvm::copy(Targets, Call->getTrailingObjects<GlobalTypeMember *>());
     return Call;
   }
 
diff --git a/llvm/unittests/Support/TrailingObjectsTest.cpp b/llvm/unittests/Support/TrailingObjectsTest.cpp
index e2656b2229ca6..e36979e75d7f7 100644
--- a/llvm/unittests/Support/TrailingObjectsTest.cpp
+++ b/llvm/unittests/Support/TrailingObjectsTest.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/TrailingObjects.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -23,17 +25,16 @@ class Class1 final : protected TrailingObjects<Class1, short> {
 protected:
   size_t numTrailingObjects(OverloadToken<short>) const { return NumShorts; }
 
-  Class1(int *ShortArray, unsigned NumShorts) : NumShorts(NumShorts) {
-    std::uninitialized_copy(ShortArray, ShortArray + NumShorts,
-                            getTrailingObjects<short>());
+  Class1(ArrayRef<int> ShortArray) : NumShorts(ShortArray.size()) {
+    llvm::copy(ShortArray, getTrailingObjects<short>());
   }
 
 public:
-  static Class1 *create(int *ShortArray, unsigned NumShorts) {
-    void *Mem = ::operator new(totalSizeToAlloc<short>(NumShorts));
-    return new (Mem) Class1(ShortArray, NumShorts);
+  static Class1 *create(ArrayRef<int> ShortArray) {
+    void *Mem = ::operator new(totalSizeToAlloc<short>(ShortArray.size()));
+    return new (Mem) Class1(ShortArray);
   }
-  void operator delete(void *p) { ::operator delete(p); }
+  void operator delete(void *Ptr) { ::operator delete(Ptr); }
 
   short get(unsigned Num) const { return getTrailingObjects<short>()[Num]; }
 
@@ -81,7 +82,7 @@ class Class2 final : protected TrailingObjects<Class2, double, short> {
       *C->getTrailingObjects<double>() = D;
     return C;
   }
-  void operator delete(void *p) { ::operator delete(p); }
+  void operator delete(void *Ptr) { ::operator delete(Ptr); }
 
   short getShort() const {
     if (!HasShort)
@@ -106,7 +107,7 @@ class Class2 final : protected TrailingObjects<Class2, double, short> {
 
 TEST(TrailingObjects, OneArg) {
   int arr[] = {1, 2, 3};
-  Class1 *C = Class1::create(arr, 3);
+  Class1 *C = Class1::create(arr);
   EXPECT_EQ(sizeof(Class1), sizeof(unsigned));
   EXPECT_EQ(Class1::additionalSizeToAlloc<short>(1), sizeof(short));
   EXPECT_EQ(Class1::additionalSizeToAlloc<short>(3), sizeof(short) * 3);
diff --git a/mlir/include/mlir/IR/BuiltinAttributes.td b/mlir/include/mlir/IR/BuiltinAttributes.td
index 854a24ab8605c..8855908276500 100644
--- a/mlir/include/mlir/IR/BuiltinAttributes.td
+++ b/mlir/include/mlir/IR/BuiltinAttributes.td
@@ -158,7 +158,7 @@ def Builtin_DenseArrayRawDataParameter : ArrayRefParameter<
     if (!$_self.empty()) {
       auto *alloc = static_cast<char *>(
           $_allocator.allocate($_self.size(), alignof(uint64_t)));
-      std::uninitialized_copy($_self.begin(), $_self.end(), alloc);
+      llvm::uninitialized_copy($_self, alloc);
       $_dst = ArrayRef<char>(alloc, $_self.size());
     }
   }];
diff --git a/mlir/include/mlir/Support/StorageUniquer.h b/mlir/include/mlir/Support/StorageUniquer.h
index 26bdf09abba21..6756c4390276f 100644
--- a/mlir/include/mlir/Support/StorageUniquer.h
+++ b/mlir/include/mlir/Support/StorageUniquer.h
@@ -99,7 +99,7 @@ class StorageUniquer {
       if (elements.empty())
         return std::nullopt;
       auto result = allocator.Allocate<T>(elements.size());
-      std::uninitialized_copy(elements.begin(), elements.end(), result);
+      llvm::uninitialized_copy(elements, result);
       return ArrayRef<T>(result, elements.size());
     }
 
@@ -110,7 +110,7 @@ class StorageUniquer {
         return StringRef();
 
       char *result = allocator.Allocate<char>(str.size() + 1);
-      std::uninitialized_copy(str.begin(), str.end(), result);
+      llvm::uninitialized_copy(str, result);
       result[str.size()] = 0;
       return StringRef(result, str.size());
     }
diff --git a/mlir/lib/Dialect/Affine/Analysis/NestedMatcher.cpp b/mlir/lib/Dialect/Affine/Analysis/NestedMatcher.cpp
index be13a89c7ab4f..c158b98381a82 100644
--- a/mlir/lib/Dialect/Affine/Analysis/NestedMatcher.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/NestedMatcher.cpp
@@ -28,7 +28,7 @@ NestedMatch NestedMatch::build(Operation *operation,
                                ArrayRef<NestedMatch> nestedMatches) {
   auto *result = allocator()->Allocate<NestedMatch>();
   auto *children = allocator()->Allocate<NestedMatch>(nestedMatches.size());
-  std::uninitialized_copy(nestedMatches.begin(), nestedMatches.end(), children);
+  llvm::uninitialized_copy(nestedMatches, children);
   new (result) NestedMatch();
   result->matchedOperation = operation;
   result->matchedChildren =
@@ -46,7 +46,7 @@ void NestedPattern::copyNestedToThis(ArrayRef<NestedPattern> nested) {
     return;
 
   auto *newNested = allocator()->Allocate<NestedPattern>(nested.size());
-  std::uninitialized_copy(nested.begin(), nested.end(), newNested);
+  llvm::uninitialized_copy(nested, newNested);
   nestedPatterns = ArrayRef<NestedPattern>(newNested, nested.size());
 }
 
diff --git a/mlir/lib/IR/AffineMapDetail.h b/mlir/lib/IR/AffineMapDetail.h
index 732c7fd1d3a12..32c9734f23a36 100644
--- a/mlir/lib/IR/AffineMapDetail.h
+++ b/mlir/lib/IR/AffineMapDetail.h
@@ -56,8 +56,7 @@ struct AffineMapStorage final
     res->numDims = std::get<0>(key);
     res->numSymbols = std::get<1>(key);
     res->numResults = results.size();
-    std::uninitialized_copy(results.begin(), results.end(),
-                            res->getTrailingObjects<AffineExpr>());
+    llvm::uninitialized_copy(results, res->getTrailingObjects<AffineExpr>());
     return res;
   }
 };
diff --git a/mlir/lib/IR/Location.cpp b/mlir/lib/IR/Location.cpp
index 506a6c1fc16fb..8ae33022be24f 100644
--- a/mlir/lib/IR/Location.cpp
+++ b/mlir/lib/IR/Location.cpp
@@ -58,11 +58,11 @@ struct FileLineColRangeAttrStorage final
     auto *result = ::new (rawMem) FileLineColRangeAttrStorage(
         std::move(std::get<0>(tblgenKey)), locEnc - 1);
     if (numInArray > 0) {
-      result->startLine = std::get<1>(tblgenKey)[0];
+      ArrayRef<unsigned> elements = std::get<1>(tblgenKey);
+      result->startLine = elements[0];
       // Copy in the element types into the trailing storage.
-      std::uninitialized_copy(std::next(std::get<1>(tblgenKey).begin()),
-                              std::get<1>(tblgenKey).end(),
-                              result->getTrailingObjects<unsigned>());
+      llvm::uninitialized_copy(elements.drop_front(),
+                               result->getTrailingObjects<unsigned>());
     }
     return result;
   }
diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp
index 87782e84dd6e4..d43dcc5a5e2fd 100644
--- a/mlir/lib/IR/MLIRContext.cpp
+++ b/mlir/lib/IR/MLIRContext.cpp
@@ -362,7 +362,7 @@ template <typename T>
 static ArrayRef<T> copyArrayRefInto(llvm::BumpPtrAllocator &allocator,
                                     ArrayRef<T> elements) {
   auto result = allocator.Allocate<T>(elements.size());
-  std::uninitialized_copy(elements.begin(), elements.end(), result);
+  llvm::uninitialized_copy(elements, result);
   return ArrayRef<T>(result, elements.size());
 }
 
diff --git a/mlir/lib/IR/TypeDetail.h b/mlir/lib/IR/TypeDetail.h
index 1d65fccb82b8e..19f3690c3d2dc 100644
--- a/mlir/lib/IR/TypeDetail.h
+++ b/mlir/lib/IR/TypeDetail.h
@@ -116,8 +116,7 @@ struct TupleTypeStorage final
     auto *result = ::new (rawMem) TupleTypeStorage(key.size());
 
     // Copy in the element types into the trailing storage.
-    std::uninitialized_copy(key.begin(), key.end(),
-                            result->getTrailingObjects<Type>());
+    llvm::uninitialized_copy(key, result->getTrailingObjects<Type>());
     return result;
   }
 
diff --git a/mlir/lib/Tools/PDLL/AST/Nodes.cpp b/mlir/lib/Tools/PDLL/AST/Nodes.cpp
index ee2fe0fb9e3c3..159ce6235662b 100644
--- a/mlir/lib/Tools/PDLL/AST/Nodes.cpp
+++ b/mlir/lib/Tools/PDLL/AST/Nodes.cpp
@@ -195,8 +195,7 @@ CompoundStmt *CompoundStmt::create(Context &ctx, SMRange loc,
   void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(CompoundStmt));
 
   CompoundStmt *stmt = new (rawData) CompoundStmt(loc, children.size());
-  std::uninitialized_copy(children.begin(), children.end(),
-                          stmt->getChildren().begin());
+  llvm::uninitialized_copy(children, stmt->getChildren().begin());
   return stmt;
 }
 
@@ -230,8 +229,7 @@ ReplaceStmt *ReplaceStmt::create(Context &ctx, SMRange loc, Expr *rootOp,
   void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(ReplaceStmt));
 
   ReplaceStmt *stmt = new (rawData) ReplaceStmt(loc, rootOp, replExprs.size());
-  std::uninitialized_copy(replExprs.begin(), replExprs.end(),
-                          stmt->getReplExprs().begin());
+  llvm::uninitialized_copy(replExprs, stmt->getReplExprs().begin());
   return stmt;
 }
 
@@ -276,8 +274,7 @@ CallExpr *CallExpr::create(Context &ctx, SMRange loc, Expr *callable,
 
   CallExpr *expr = new (rawData)
       CallExpr(loc, resultType, callable, arguments.size(), isNegated);
-  std::uninitialized_copy(arguments.begin(), arguments.end(),
-                          expr->getArguments().begin());
+  llvm::uninitialized_copy(arguments, expr->getArguments().begin());
   return expr;
 }
 
@@ -321,12 +318,9 @@ OperationExpr::create(Context &ctx, SMRange loc, const ods::Operation *odsOp,
   OperationExpr *opExpr = new (rawData)
       OperationExpr(loc, resultType, name, operands.size(), resultTypes.size(),
                     attributes.size(), name->getLoc());
-  std::uninitialized_copy(operands.begin(), operands.end(),
-                          opExpr->getOperands().begin());
-  std::uninitialized_copy(resultTypes.begin(), resultTypes.end(),
-                          opExpr->getResultTypes().begin());
-  std::uninitialized_copy(attributes.begin(), attributes.end(),
-                          opExpr->getAttributes().begin());
+  llvm::uninitialized_copy(operands, opExpr->getOperands().begin());
+  llvm::uninitialized_copy(resultTypes, opExpr->getResultTypes().begin());
+  llvm::uninitialized_copy(attributes, opExpr->getAttributes().begin());
   return opExpr;
 }
 
@@ -344,8 +338,7 @@ RangeExpr *RangeExpr::create(Context &ctx, SMRange loc,
   void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(TupleExpr));
 
   RangeExpr *expr = new (rawData) RangeExpr(loc, type, elements.size());
-  std::uninitialized_copy(elements.begin(), elements.end(),
-                          expr->getElements().begin());
+  llvm::uninitialized_copy(elements, expr->getElements().begin());
   return expr;
 }
 
@@ -364,8 +357,7 @@ TupleExpr *TupleExpr::create(Context &ctx, SMRange loc,
   TupleType type = TupleType::get(ctx, llvm::to_vector(elementTypes), names);
 
   TupleExpr *expr = new (rawData) TupleExpr(loc, type);
-  std::uninitialized_copy(elements.begin(), elements.end(),
-                          expr->getElements().begin());
+  llvm::uninitialized_copy(elements, expr->getElements().begin());
   return expr;
 }
 
@@ -482,10 +474,8 @@ UserConstraintDecl *UserConstraintDecl::createImpl(
   UserConstraintDecl *decl = new (rawData)
       UserConstraintDecl(name, inputs.size(), hasNativeInputTypes,
                          results.size(), codeBlock, body, resultType);
-  std::uninitialized_copy(inputs.begin(), inputs.end(),
-                          decl->getInputs().begin());
-  std::uninitialized_copy(results.begin(), results.end(),
-                          decl->getResults().begin());
+  llvm::uninitialized_copy(inputs, decl->getInputs().begin());
+  llvm::uninitialized_copy(results, decl->getResults().begin());
   if (hasNativeInputTypes) {
     StringRef *nativeInputTypesPtr = decl->getTrailingObjects<StringRef>();
     for (unsigned i = 0, e = inputs.size(); i < e; ++i)
@@ -547,10 +537,8 @@ UserRewriteDecl *UserRewriteDecl::createImpl(Context &ctx, const Name &name,
 
   UserRewriteDecl *decl = new (rawData) UserRewriteDecl(
       name, inputs.size(), results.size(), codeBlock, body, resultType);
-  std::uninitialized_copy(inputs.begin(), inputs.end(),
-                          decl->getInputs().begin());
-  std::uninitialized_copy(results.begin(), results.end(),
-                          decl->getResults().begin());
+  llvm::uninitialized_copy(inputs, decl->getInputs().begin());
+  llvm::uninitialized_copy(results, decl->getResults().begin());
   return decl;
 }
 
@@ -567,8 +555,7 @@ VariableDecl *VariableDecl::create(Context &ctx, const Name &name, Type type,
 
   VariableDecl *varDecl =
       new (rawData) VariableDecl(name, type, initExpr, constraints.size());
-  std::uninitialized_copy(constraints.begin(), constraints.end(),
-                          varDecl->getConstraints().begin());
+  llvm::uninitialized_copy(constraints, varDecl->getConstraints().begin());
   return varDecl;
 }
 
@@ -581,7 +568,6 @@ Module *Module::create(Context &ctx, SMLoc loc, ArrayRef<Decl *> children) {
   void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(Module));
 
   Module *module = new (rawData) Module(loc, children.size());
-  std::uninitialized_copy(children.begin(), children.end(),
-                          module->getChildren().begin());
+  llvm::uninitialized_copy(children, module->getChildren().begin());
   return module;
 }

>From 482e9b06d84ef230f8fe2d0b25ae91d38896e697 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames at gmail.com>
Date: Wed, 7 May 2025 04:34:07 +0000
Subject: [PATCH 075/115] [JITLink][i386] Drop EdgeKind_i386 qualification when
 using enum values.

We don't need to explicitly qualify these values.
---
 llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp | 39 +++++++++----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
index b14b87232e24d..3bde1f1c40a94 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
@@ -115,27 +115,26 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
   using ELFT = object::ELF32LE;
 
   Expected<i386::EdgeKind_i386> getRelocationKind(const uint32_t Type) {
-    using namespace i386;
     switch (Type) {
     case ELF::R_386_32:
-      return EdgeKind_i386::Pointer32;
+      return i386::Pointer32;
     case ELF::R_386_PC32:
-      return EdgeKind_i386::PCRel32;
+      return i386::PCRel32;
     case ELF::R_386_16:
-      return EdgeKind_i386::Pointer16;
+      return i386::Pointer16;
     case ELF::R_386_PC16:
-      return EdgeKind_i386::PCRel16;
+      return i386::PCRel16;
     case ELF::R_386_GOT32:
-      return EdgeKind_i386::RequestGOTAndTransformToDelta32FromGOT;
+      return i386::RequestGOTAndTransformToDelta32FromGOT;
     case ELF::R_386_GOT32X:
       // TODO: Add a relaxable edge kind and update relaxation optimization.
-      return EdgeKind_i386::RequestGOTAndTransformToDelta32FromGOT;
+      return i386::RequestGOTAndTransformToDelta32FromGOT;
     case ELF::R_386_GOTPC:
-      return EdgeKind_i386::Delta32;
+      return i386::Delta32;
     case ELF::R_386_GOTOFF:
-      return EdgeKind_i386::Delta32FromGOT;
+      return i386::Delta32FromGOT;
     case ELF::R_386_PLT32:
-      return EdgeKind_i386::BranchPCRel32;
+      return i386::BranchPCRel32;
     }
 
     return make_error<JITLinkError>(
@@ -196,21 +195,21 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
     int64_t Addend = 0;
 
     switch (*Kind) {
-    case i386::EdgeKind_i386::Pointer32:
-    case i386::EdgeKind_i386::PCRel32:
-    case i386::EdgeKind_i386::RequestGOTAndTransformToDelta32FromGOT:
-    case i386::EdgeKind_i386::Delta32:
-    case i386::EdgeKind_i386::Delta32FromGOT:
-    case i386::EdgeKind_i386::BranchPCRel32:
-    case i386::EdgeKind_i386::BranchPCRel32ToPtrJumpStub:
-    case i386::EdgeKind_i386::BranchPCRel32ToPtrJumpStubBypassable: {
+    case i386::Pointer32:
+    case i386::PCRel32:
+    case i386::RequestGOTAndTransformToDelta32FromGOT:
+    case i386::Delta32:
+    case i386::Delta32FromGOT:
+    case i386::BranchPCRel32:
+    case i386::BranchPCRel32ToPtrJumpStub:
+    case i386::BranchPCRel32ToPtrJumpStubBypassable: {
       const char *FixupContent = BlockToFix.getContent().data() +
                                  (FixupAddress - BlockToFix.getAddress());
       Addend = *(const support::little32_t *)FixupContent;
       break;
     }
-    case i386::EdgeKind_i386::Pointer16:
-    case i386::EdgeKind_i386::PCRel16: {
+    case i386::Pointer16:
+    case i386::PCRel16: {
       const char *FixupContent = BlockToFix.getContent().data() +
                                  (FixupAddress - BlockToFix.getAddress());
       Addend = *(const support::little16_t *)FixupContent;

>From e0e3d05a2e048c95b6eaa4b08f41b4c7ac66a023 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 7 May 2025 17:38:11 -0700
Subject: [PATCH 076/115] [RISCV] Fix the link to the XAndesPerf specification.
 NFC (#138804)

We need to use 2 underscores after the URL like the other specification
links.
---
 llvm/docs/RISCVUsage.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index d0689b779f551..4b98f58304f13 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -506,7 +506,7 @@ The current vendor extensions supported are:
   LLVM implements `version 0.1 of the Rivos Vector Register Zips extension specification <https://github.com/rivosinc/rivos-custom-extensions>`__.
 
 ``XAndesPerf``
-  LLVM implements `version 5.0.0 of the Andes Performance Extension specification <https://github.com/andestech/andes-v5-isa/releases/download/ast-v5_4_0-release/AndeStar_V5_ISA_Spec_UM165-v1.5.08-20250317.pdf>` by Andes Technology. All instructions are prefixed with `nds.` as described in the specification.
+  LLVM implements `version 5.0.0 of the Andes Performance Extension specification <https://github.com/andestech/andes-v5-isa/releases/download/ast-v5_4_0-release/AndeStar_V5_ISA_Spec_UM165-v1.5.08-20250317.pdf>`__ by Andes Technology. All instructions are prefixed with `nds.` as described in the specification.
 
 Experimental C Intrinsics
 =========================

>From c7f350f1428df14e3114977b830ab4dcd3008983 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames at gmail.com>
Date: Wed, 7 May 2025 04:46:20 +0000
Subject: [PATCH 077/115] [JITLink][i386] Remove more unnecessary enum value
 qualifications.

---
 .../llvm/ExecutionEngine/JITLink/i386.h       | 29 +++++++++----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
index 629e0d8a18729..3900fdc8ec7b1 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
@@ -181,7 +181,6 @@ const char *getEdgeKindName(Edge::Kind K);
 /// Apply fixup expression for edge to block content.
 inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
                         const Symbol *GOTSymbol) {
-  using namespace i386;
   using namespace llvm::support;
 
   char *BlockWorkingMem = B.getAlreadyMutableContent().data();
@@ -189,19 +188,19 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
   auto FixupAddress = B.getAddress() + E.getOffset();
 
   switch (E.getKind()) {
-  case i386::Pointer32: {
+  case Pointer32: {
     uint32_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
     *(ulittle32_t *)FixupPtr = Value;
     break;
   }
 
-  case i386::PCRel32: {
+  case PCRel32: {
     int32_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
     *(little32_t *)FixupPtr = Value;
     break;
   }
 
-  case i386::Pointer16: {
+  case Pointer16: {
     uint32_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
     if (LLVM_LIKELY(isUInt<16>(Value)))
       *(ulittle16_t *)FixupPtr = Value;
@@ -210,7 +209,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
     break;
   }
 
-  case i386::PCRel16: {
+  case PCRel16: {
     int32_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
     if (LLVM_LIKELY(isInt<16>(Value)))
       *(little16_t *)FixupPtr = Value;
@@ -219,13 +218,13 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
     break;
   }
 
-  case i386::Delta32: {
+  case Delta32: {
     int32_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
     *(little32_t *)FixupPtr = Value;
     break;
   }
 
-  case i386::Delta32FromGOT: {
+  case Delta32FromGOT: {
     assert(GOTSymbol && "No GOT section symbol");
     int32_t Value =
         E.getTarget().getAddress() - GOTSymbol->getAddress() + E.getAddend();
@@ -233,9 +232,9 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
     break;
   }
 
-  case i386::BranchPCRel32:
-  case i386::BranchPCRel32ToPtrJumpStub:
-  case i386::BranchPCRel32ToPtrJumpStubBypassable: {
+  case BranchPCRel32:
+  case BranchPCRel32ToPtrJumpStub:
+  case BranchPCRel32ToPtrJumpStubBypassable: {
     int32_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
     *(little32_t *)FixupPtr = Value;
     break;
@@ -321,14 +320,14 @@ class GOTTableManager : public TableManager<GOTTableManager> {
   bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
     Edge::Kind KindToSet = Edge::Invalid;
     switch (E.getKind()) {
-    case i386::Delta32FromGOT: {
+    case Delta32FromGOT: {
       // we need to make sure that the GOT section exists, but don't otherwise
       // need to fix up this edge
       getGOTSection(G);
       return false;
     }
-    case i386::RequestGOTAndTransformToDelta32FromGOT:
-      KindToSet = i386::Delta32FromGOT;
+    case RequestGOTAndTransformToDelta32FromGOT:
+      KindToSet = Delta32FromGOT;
       break;
     default:
       return false;
@@ -367,7 +366,7 @@ class PLTTableManager : public TableManager<PLTTableManager> {
   static StringRef getSectionName() { return "$__STUBS"; }
 
   bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
-    if (E.getKind() == i386::BranchPCRel32 && !E.getTarget().isDefined()) {
+    if (E.getKind() == BranchPCRel32 && !E.getTarget().isDefined()) {
       DEBUG_WITH_TYPE("jitlink", {
         dbgs() << "  Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
                << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
@@ -375,7 +374,7 @@ class PLTTableManager : public TableManager<PLTTableManager> {
       });
       // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to
       // be optimized when the target is in-range.
-      E.setKind(i386::BranchPCRel32ToPtrJumpStubBypassable);
+      E.setKind(BranchPCRel32ToPtrJumpStubBypassable);
       E.setTarget(getEntryForTarget(G, E.getTarget()));
       return true;
     }

>From b972164f38133fbc878275f4ae324908ae14d750 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames at gmail.com>
Date: Wed, 7 May 2025 04:59:20 +0000
Subject: [PATCH 078/115] [JITLink] Rename 'i386' namespace and files to 'x86'.

When building on i386, both clang and gcc define a builtin 'i386' macro (see
discussion in https://github.com/llvm/llvm-project/pull/137063). This causes
build errors in the JITLink/i386 backend when attempting to build LLVM on i386.

This commit renames the 'i386' backend (namespaces, APIs and files) to 'x86' to
avoid this issue.
---
 .../JITLink/{ELF_i386.h => ELF_x86.h}         |  22 ++--
 .../ExecutionEngine/JITLink/{i386.h => x86.h} |  27 ++---
 .../ExecutionEngine/JITLink/CMakeLists.txt    |   4 +-
 llvm/lib/ExecutionEngine/JITLink/ELF.cpp      |   6 +-
 .../JITLink/{ELF_i386.cpp => ELF_x86.cpp}     | 110 +++++++++---------
 llvm/lib/ExecutionEngine/JITLink/JITLink.cpp  |   6 +-
 .../JITLink/{i386.cpp => x86.cpp}             |  14 +--
 .../ELF_external_to_absolute_conversion.s     |   0
 .../ELF_x86_absolute_relocations_16.s}        |   0
 .../ELF_x86_absolute_relocations_32.s}        |   0
 .../ELF_x86_minimal.s}                        |   0
 .../ELF_x86_pc_relative_relocations_32.s}     |   0
 .../ELF_x86_small_pic_relocations_got.s}      |   0
 .../ELF_x86_small_pic_relocations_plt.s}      |   2 +-
 .../JITLink/{i386 => x86}/lit.local.cfg       |   0
 15 files changed, 96 insertions(+), 95 deletions(-)
 rename llvm/include/llvm/ExecutionEngine/JITLink/{ELF_i386.h => ELF_x86.h} (56%)
 rename llvm/include/llvm/ExecutionEngine/JITLink/{i386.h => x86.h} (95%)
 rename llvm/lib/ExecutionEngine/JITLink/{ELF_i386.cpp => ELF_x86.cpp} (70%)
 rename llvm/lib/ExecutionEngine/JITLink/{i386.cpp => x86.cpp} (87%)
 rename llvm/test/ExecutionEngine/JITLink/{i386 => x86}/ELF_external_to_absolute_conversion.s (100%)
 rename llvm/test/ExecutionEngine/JITLink/{i386/ELF_i386_absolute_relocations_16.s => x86/ELF_x86_absolute_relocations_16.s} (100%)
 rename llvm/test/ExecutionEngine/JITLink/{i386/ELF_i386_absolute_relocations_32.s => x86/ELF_x86_absolute_relocations_32.s} (100%)
 rename llvm/test/ExecutionEngine/JITLink/{i386/ELF_i386_minimal.s => x86/ELF_x86_minimal.s} (100%)
 rename llvm/test/ExecutionEngine/JITLink/{i386/ELF_i386_pc_relative_relocations_32.s => x86/ELF_x86_pc_relative_relocations_32.s} (100%)
 rename llvm/test/ExecutionEngine/JITLink/{i386/ELF_i386_small_pic_relocations_got.s => x86/ELF_x86_small_pic_relocations_got.s} (100%)
 rename llvm/test/ExecutionEngine/JITLink/{i386/ELF_i386_small_pic_relocations_plt.s => x86/ELF_x86_small_pic_relocations_plt.s} (95%)
 rename llvm/test/ExecutionEngine/JITLink/{i386 => x86}/lit.local.cfg (100%)

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86.h
similarity index 56%
rename from llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h
rename to llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86.h
index 0752f214d9d58..82151f93a00bc 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_i386.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86.h
@@ -1,4 +1,4 @@
-//===--- ELF_i386.h - JIT link functions for ELF/i386 --*- C++ -*----===//
+//===------- ELF_x86.h - JIT link functions for ELF/x86 ------*- C++ -*----===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -8,33 +8,33 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// jit-link functions for ELF/i386.
+// jit-link functions for ELF/x86.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_I386_H
-#define LLVM_EXECUTIONENGINE_JITLINK_ELF_I386_H
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_X86_H
+#define LLVM_EXECUTIONENGINE_JITLINK_ELF_X86_H
 
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
 
 namespace llvm {
 namespace jitlink {
 
-/// Create a LinkGraph from an ELF/i386 relocatable object
+/// Create a LinkGraph from an ELF/x86 relocatable object
 ///
 /// Note: The graph does not take ownership of the underlying buffer, nor copy
 /// its contents. The caller is responsible for ensuring that the object buffer
 /// outlives the graph.
 Expected<std::unique_ptr<LinkGraph>>
-createLinkGraphFromELFObject_i386(MemoryBufferRef ObjectBuffer,
-                                  std::shared_ptr<orc::SymbolStringPool> SSP);
+createLinkGraphFromELFObject_x86(MemoryBufferRef ObjectBuffer,
+                                 std::shared_ptr<orc::SymbolStringPool> SSP);
 
-/// jit-link the given object buffer, which must be a ELF i386 relocatable
+/// jit-link the given object buffer, which must be a ELF x86 relocatable
 /// object file.
-void link_ELF_i386(std::unique_ptr<LinkGraph> G,
-                   std::unique_ptr<JITLinkContext> Ctx);
+void link_ELF_x86(std::unique_ptr<LinkGraph> G,
+                  std::unique_ptr<JITLinkContext> Ctx);
 
 } // end namespace jitlink
 } // end namespace llvm
 
-#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_I386_H
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_X86_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86.h
similarity index 95%
rename from llvm/include/llvm/ExecutionEngine/JITLink/i386.h
rename to llvm/include/llvm/ExecutionEngine/JITLink/x86.h
index 3900fdc8ec7b1..eb372db88688e 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86.h
@@ -1,4 +1,4 @@
-//=== i386.h - Generic JITLink i386 edge kinds, utilities -*- C++ -*-===//
+//===----- x86.h - Generic JITLink x86 edge kinds, utilities ----*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,19 +6,20 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Generic utilities for graphs representing i386 objects.
+// Generic utilities for graphs representing x86 objects.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_EXECUTIONENGINE_JITLINK_I386_H
-#define LLVM_EXECUTIONENGINE_JITLINK_I386_H
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_X86_H
+#define LLVM_EXECUTIONENGINE_JITLINK_X86_H
 
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
 #include "llvm/ExecutionEngine/JITLink/TableManager.h"
 
-namespace llvm::jitlink::i386 {
-/// Represets i386 fixups
-enum EdgeKind_i386 : Edge::Kind {
+namespace llvm::jitlink::x86 {
+
+/// Represets x86 fixups
+enum EdgeKind_x86 : Edge::Kind {
 
   /// A plain 32-bit pointer value relocation.
   ///
@@ -174,7 +175,7 @@ enum EdgeKind_i386 : Edge::Kind {
   BranchPCRel32ToPtrJumpStubBypassable,
 };
 
-/// Returns a string name for the given i386 edge. For debugging purposes
+/// Returns a string name for the given x86 edge. For debugging purposes
 /// only
 const char *getEdgeKindName(Edge::Kind K);
 
@@ -249,13 +250,13 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
   return Error::success();
 }
 
-/// i386 pointer size.
+/// x86 pointer size.
 constexpr uint32_t PointerSize = 4;
 
-/// i386 null pointer content.
+/// x86 null pointer content.
 extern const char NullPointerContent[PointerSize];
 
-/// i386 pointer jump stub content.
+/// x86 pointer jump stub content.
 ///
 /// Contains the instruction sequence for an indirect jump via an in-memory
 /// pointer:
@@ -406,6 +407,6 @@ class PLTTableManager : public TableManager<PLTTableManager> {
 /// target
 Error optimizeGOTAndStubAccesses(LinkGraph &G);
 
-} // namespace llvm::jitlink::i386
+} // namespace llvm::jitlink::x86
 
-#endif // LLVM_EXECUTIONENGINE_JITLINK_I386_H
+#endif // LLVM_EXECUTIONENGINE_JITLINK_X86_H
diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt
index 22e4513e1374c..4669124ebe578 100644
--- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt
@@ -23,10 +23,10 @@ add_llvm_component_library(LLVMJITLink
   ELFLinkGraphBuilder.cpp
   ELF_aarch32.cpp
   ELF_aarch64.cpp
-  ELF_i386.cpp
   ELF_loongarch.cpp
   ELF_ppc64.cpp
   ELF_riscv.cpp
+  ELF_x86.cpp
   ELF_x86_64.cpp
 
   # COFF
@@ -43,10 +43,10 @@ add_llvm_component_library(LLVMJITLink
   # Architectures:
   aarch32.cpp
   aarch64.cpp
-  i386.cpp
   loongarch.cpp
   ppc64.cpp
   riscv.cpp
+  x86.cpp
   x86_64.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
index 663a883a4bcce..87e451715811f 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
@@ -15,10 +15,10 @@
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h"
 #include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h"
-#include "llvm/ExecutionEngine/JITLink/ELF_i386.h"
 #include "llvm/ExecutionEngine/JITLink/ELF_loongarch.h"
 #include "llvm/ExecutionEngine/JITLink/ELF_ppc64.h"
 #include "llvm/ExecutionEngine/JITLink/ELF_riscv.h"
+#include "llvm/ExecutionEngine/JITLink/ELF_x86.h"
 #include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
 #include "llvm/Object/ELF.h"
 #include <cstring>
@@ -101,7 +101,7 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer,
   case ELF::EM_X86_64:
     return createLinkGraphFromELFObject_x86_64(ObjectBuffer, std::move(SSP));
   case ELF::EM_386:
-    return createLinkGraphFromELFObject_i386(ObjectBuffer, std::move(SSP));
+    return createLinkGraphFromELFObject_x86(ObjectBuffer, std::move(SSP));
   default:
     return make_error<JITLinkError>(
         "Unsupported target machine architecture in ELF object " +
@@ -139,7 +139,7 @@ void link_ELF(std::unique_ptr<LinkGraph> G,
     link_ELF_x86_64(std::move(G), std::move(Ctx));
     return;
   case Triple::x86:
-    link_ELF_i386(std::move(G), std::move(Ctx));
+    link_ELF_x86(std::move(G), std::move(Ctx));
     return;
   default:
     Ctx->notifyFailed(make_error<JITLinkError>(
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86.cpp
similarity index 70%
rename from llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
rename to llvm/lib/ExecutionEngine/JITLink/ELF_x86.cpp
index 3bde1f1c40a94..8e0f6fb91a607 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86.cpp
@@ -1,4 +1,4 @@
-//===----- ELF_i386.cpp - JIT linker implementation for ELF/i386 ----===//
+//===--------- ELF_x86.cpp - JIT linker implementation for ELF/x86 --------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,16 +6,16 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// ELF/i386 jit-link implementation.
+// ELF/x86 jit-link implementation.
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ExecutionEngine/JITLink/ELF_i386.h"
+#include "llvm/ExecutionEngine/JITLink/ELF_x86.h"
 #include "DefineExternalSectionStartAndEndSymbols.h"
 #include "ELFLinkGraphBuilder.h"
 #include "JITLinkGeneric.h"
 #include "llvm/BinaryFormat/ELF.h"
-#include "llvm/ExecutionEngine/JITLink/i386.h"
+#include "llvm/ExecutionEngine/JITLink/x86.h"
 #include "llvm/Object/ELFObjectFile.h"
 
 #define DEBUG_TYPE "jitlink"
@@ -26,11 +26,11 @@ using namespace llvm::jitlink;
 namespace {
 constexpr StringRef ELFGOTSymbolName = "_GLOBAL_OFFSET_TABLE_";
 
-Error buildTables_ELF_i386(LinkGraph &G) {
+Error buildTables_ELF_x86(LinkGraph &G) {
   LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
 
-  i386::GOTTableManager GOT;
-  i386::PLTTableManager PLT(GOT);
+  x86::GOTTableManager GOT;
+  x86::PLTTableManager PLT(GOT);
   visitExistingEdges(G, GOT, PLT);
   return Error::success();
 }
@@ -38,12 +38,12 @@ Error buildTables_ELF_i386(LinkGraph &G) {
 
 namespace llvm::jitlink {
 
-class ELFJITLinker_i386 : public JITLinker<ELFJITLinker_i386> {
-  friend class JITLinker<ELFJITLinker_i386>;
+class ELFJITLinker_x86 : public JITLinker<ELFJITLinker_x86> {
+  friend class JITLinker<ELFJITLinker_x86>;
 
 public:
-  ELFJITLinker_i386(std::unique_ptr<JITLinkContext> Ctx,
-                    std::unique_ptr<LinkGraph> G, PassConfiguration PassConfig)
+  ELFJITLinker_x86(std::unique_ptr<JITLinkContext> Ctx,
+                   std::unique_ptr<LinkGraph> G, PassConfiguration PassConfig)
       : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {
     getPassConfig().PostAllocationPasses.push_back(
         [this](LinkGraph &G) { return getOrCreateGOTSymbol(G); });
@@ -59,7 +59,7 @@ class ELFJITLinker_i386 : public JITLinker<ELFJITLinker_i386> {
               if (Sym.getName() != nullptr &&
                   *Sym.getName() == ELFGOTSymbolName)
                 if (auto *GOTSection = G.findSectionByName(
-                        i386::GOTTableManager::getSectionName())) {
+                        x86::GOTTableManager::getSectionName())) {
                   GOTSymbol = &Sym;
                   return {*GOTSection, true};
                 }
@@ -79,7 +79,7 @@ class ELFJITLinker_i386 : public JITLinker<ELFJITLinker_i386> {
     // record it, otherwise we'll create our own.
     // If there's a GOT section but we didn't find an external GOT symbol...
     if (auto *GOTSection =
-            G.findSectionByName(i386::GOTTableManager::getSectionName())) {
+            G.findSectionByName(x86::GOTTableManager::getSectionName())) {
 
       // Check for an existing defined symbol.
       for (auto *Sym : GOTSection->symbols())
@@ -106,52 +106,52 @@ class ELFJITLinker_i386 : public JITLinker<ELFJITLinker_i386> {
   }
 
   Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
-    return i386::applyFixup(G, B, E, GOTSymbol);
+    return x86::applyFixup(G, B, E, GOTSymbol);
   }
 };
 
-class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
+class ELFLinkGraphBuilder_x86 : public ELFLinkGraphBuilder<object::ELF32LE> {
 private:
   using ELFT = object::ELF32LE;
 
-  Expected<i386::EdgeKind_i386> getRelocationKind(const uint32_t Type) {
+  Expected<x86::EdgeKind_x86> getRelocationKind(const uint32_t Type) {
     switch (Type) {
     case ELF::R_386_32:
-      return i386::Pointer32;
+      return x86::Pointer32;
     case ELF::R_386_PC32:
-      return i386::PCRel32;
+      return x86::PCRel32;
     case ELF::R_386_16:
-      return i386::Pointer16;
+      return x86::Pointer16;
     case ELF::R_386_PC16:
-      return i386::PCRel16;
+      return x86::PCRel16;
     case ELF::R_386_GOT32:
-      return i386::RequestGOTAndTransformToDelta32FromGOT;
+      return x86::RequestGOTAndTransformToDelta32FromGOT;
     case ELF::R_386_GOT32X:
       // TODO: Add a relaxable edge kind and update relaxation optimization.
-      return i386::RequestGOTAndTransformToDelta32FromGOT;
+      return x86::RequestGOTAndTransformToDelta32FromGOT;
     case ELF::R_386_GOTPC:
-      return i386::Delta32;
+      return x86::Delta32;
     case ELF::R_386_GOTOFF:
-      return i386::Delta32FromGOT;
+      return x86::Delta32FromGOT;
     case ELF::R_386_PLT32:
-      return i386::BranchPCRel32;
+      return x86::BranchPCRel32;
     }
 
     return make_error<JITLinkError>(
-        "In " + G->getName() + ": Unsupported i386 relocation type " +
+        "In " + G->getName() + ": Unsupported x86 relocation type " +
         object::getELFRelocationTypeName(ELF::EM_386, Type));
   }
 
   Error addRelocations() override {
     LLVM_DEBUG(dbgs() << "Adding relocations\n");
     using Base = ELFLinkGraphBuilder<ELFT>;
-    using Self = ELFLinkGraphBuilder_i386;
+    using Self = ELFLinkGraphBuilder_x86;
 
     for (const auto &RelSect : Base::Sections) {
       // Validate the section to read relocation entries from.
       if (RelSect.sh_type == ELF::SHT_RELA)
         return make_error<StringError>(
-            "No SHT_RELA in valid i386 ELF object files",
+            "No SHT_RELA in valid x86 ELF object files",
             inconvertibleErrorCode());
 
       if (Error Err = Base::forEachRelRelocation(RelSect, this,
@@ -187,7 +187,7 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
                   Base::GraphSymbols.size()),
           inconvertibleErrorCode());
 
-    Expected<i386::EdgeKind_i386> Kind = getRelocationKind(ELFReloc);
+    Expected<x86::EdgeKind_x86> Kind = getRelocationKind(ELFReloc);
     if (!Kind)
       return Kind.takeError();
 
@@ -195,21 +195,21 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
     int64_t Addend = 0;
 
     switch (*Kind) {
-    case i386::Pointer32:
-    case i386::PCRel32:
-    case i386::RequestGOTAndTransformToDelta32FromGOT:
-    case i386::Delta32:
-    case i386::Delta32FromGOT:
-    case i386::BranchPCRel32:
-    case i386::BranchPCRel32ToPtrJumpStub:
-    case i386::BranchPCRel32ToPtrJumpStubBypassable: {
+    case x86::Pointer32:
+    case x86::PCRel32:
+    case x86::RequestGOTAndTransformToDelta32FromGOT:
+    case x86::Delta32:
+    case x86::Delta32FromGOT:
+    case x86::BranchPCRel32:
+    case x86::BranchPCRel32ToPtrJumpStub:
+    case x86::BranchPCRel32ToPtrJumpStubBypassable: {
       const char *FixupContent = BlockToFix.getContent().data() +
                                  (FixupAddress - BlockToFix.getAddress());
       Addend = *(const support::little32_t *)FixupContent;
       break;
     }
-    case i386::Pointer16:
-    case i386::PCRel16: {
+    case x86::Pointer16:
+    case x86::PCRel16: {
       const char *FixupContent = BlockToFix.getContent().data() +
                                  (FixupAddress - BlockToFix.getAddress());
       Addend = *(const support::little16_t *)FixupContent;
@@ -221,7 +221,7 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
     Edge GE(*Kind, Offset, *GraphSymbol, Addend);
     LLVM_DEBUG({
       dbgs() << "    ";
-      printEdge(dbgs(), BlockToFix, GE, i386::getEdgeKindName(*Kind));
+      printEdge(dbgs(), BlockToFix, GE, x86::getEdgeKindName(*Kind));
       dbgs() << "\n";
     });
 
@@ -230,17 +230,17 @@ class ELFLinkGraphBuilder_i386 : public ELFLinkGraphBuilder<object::ELF32LE> {
   }
 
 public:
-  ELFLinkGraphBuilder_i386(StringRef FileName, const object::ELFFile<ELFT> &Obj,
-                           std::shared_ptr<orc::SymbolStringPool> SSP,
-                           Triple TT, SubtargetFeatures Features)
+  ELFLinkGraphBuilder_x86(StringRef FileName, const object::ELFFile<ELFT> &Obj,
+                          std::shared_ptr<orc::SymbolStringPool> SSP, Triple TT,
+                          SubtargetFeatures Features)
       : ELFLinkGraphBuilder<ELFT>(Obj, std::move(SSP), std::move(TT),
                                   std::move(Features), FileName,
-                                  i386::getEdgeKindName) {}
+                                  x86::getEdgeKindName) {}
 };
 
 Expected<std::unique_ptr<LinkGraph>>
-createLinkGraphFromELFObject_i386(MemoryBufferRef ObjectBuffer,
-                                  std::shared_ptr<orc::SymbolStringPool> SSP) {
+createLinkGraphFromELFObject_x86(MemoryBufferRef ObjectBuffer,
+                                 std::shared_ptr<orc::SymbolStringPool> SSP) {
   LLVM_DEBUG({
     dbgs() << "Building jitlink graph for new input "
            << ObjectBuffer.getBufferIdentifier() << "...\n";
@@ -255,18 +255,18 @@ createLinkGraphFromELFObject_i386(MemoryBufferRef ObjectBuffer,
     return Features.takeError();
 
   assert((*ELFObj)->getArch() == Triple::x86 &&
-         "Only i386 (little endian) is supported for now");
+         "Only x86 (little endian) is supported for now");
 
   auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF32LE>>(**ELFObj);
 
-  return ELFLinkGraphBuilder_i386((*ELFObj)->getFileName(),
-                                  ELFObjFile.getELFFile(), std::move(SSP),
-                                  (*ELFObj)->makeTriple(), std::move(*Features))
+  return ELFLinkGraphBuilder_x86((*ELFObj)->getFileName(),
+                                 ELFObjFile.getELFFile(), std::move(SSP),
+                                 (*ELFObj)->makeTriple(), std::move(*Features))
       .buildGraph();
 }
 
-void link_ELF_i386(std::unique_ptr<LinkGraph> G,
-                   std::unique_ptr<JITLinkContext> Ctx) {
+void link_ELF_x86(std::unique_ptr<LinkGraph> G,
+                  std::unique_ptr<JITLinkContext> Ctx) {
   PassConfiguration Config;
   const Triple &TT = G->getTargetTriple();
   if (Ctx->shouldAddDefaultTargetPasses(TT)) {
@@ -276,15 +276,15 @@ void link_ELF_i386(std::unique_ptr<LinkGraph> G,
       Config.PrePrunePasses.push_back(markAllSymbolsLive);
 
     // Add an in-place GOT and PLT build pass.
-    Config.PostPrunePasses.push_back(buildTables_ELF_i386);
+    Config.PostPrunePasses.push_back(buildTables_ELF_x86);
 
     // Add GOT/Stubs optimizer pass.
-    Config.PreFixupPasses.push_back(i386::optimizeGOTAndStubAccesses);
+    Config.PreFixupPasses.push_back(x86::optimizeGOTAndStubAccesses);
   }
   if (auto Err = Ctx->modifyPassConfig(*G, Config))
     return Ctx->notifyFailed(std::move(Err));
 
-  ELFJITLinker_i386::link(std::move(Ctx), std::move(G), std::move(Config));
+  ELFJITLinker_x86::link(std::move(Ctx), std::move(G), std::move(Config));
 }
 
 } // namespace llvm::jitlink
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index e1209e1e95496..355f44d589a2d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -15,8 +15,8 @@
 #include "llvm/ExecutionEngine/JITLink/MachO.h"
 #include "llvm/ExecutionEngine/JITLink/XCOFF.h"
 #include "llvm/ExecutionEngine/JITLink/aarch64.h"
-#include "llvm/ExecutionEngine/JITLink/i386.h"
 #include "llvm/ExecutionEngine/JITLink/loongarch.h"
+#include "llvm/ExecutionEngine/JITLink/x86.h"
 #include "llvm/ExecutionEngine/JITLink/x86_64.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -466,7 +466,7 @@ AnonymousPointerCreator getAnonymousPointerCreator(const Triple &TT) {
   case Triple::x86_64:
     return x86_64::createAnonymousPointer;
   case Triple::x86:
-    return i386::createAnonymousPointer;
+    return x86::createAnonymousPointer;
   case Triple::loongarch32:
   case Triple::loongarch64:
     return loongarch::createAnonymousPointer;
@@ -482,7 +482,7 @@ PointerJumpStubCreator getPointerJumpStubCreator(const Triple &TT) {
   case Triple::x86_64:
     return x86_64::createAnonymousPointerJumpStub;
   case Triple::x86:
-    return i386::createAnonymousPointerJumpStub;
+    return x86::createAnonymousPointerJumpStub;
   case Triple::loongarch32:
   case Triple::loongarch64:
     return loongarch::createAnonymousPointerJumpStub;
diff --git a/llvm/lib/ExecutionEngine/JITLink/i386.cpp b/llvm/lib/ExecutionEngine/JITLink/x86.cpp
similarity index 87%
rename from llvm/lib/ExecutionEngine/JITLink/i386.cpp
rename to llvm/lib/ExecutionEngine/JITLink/x86.cpp
index f714716fb353d..2743d4614743d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/x86.cpp
@@ -1,4 +1,4 @@
-//===---- i386.cpp - Generic JITLink i386 edge kinds, utilities -----===//
+//===-------- x86.cpp - Generic JITLink x86 edge kinds, utilities ---------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,15 +6,15 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Generic utilities for graphs representing i386 objects.
+// Generic utilities for graphs representing x86 objects.
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ExecutionEngine/JITLink/i386.h"
+#include "llvm/ExecutionEngine/JITLink/x86.h"
 
 #define DEBUG_TYPE "jitlink"
 
-namespace llvm::jitlink::i386 {
+namespace llvm::jitlink::x86 {
 
 const char *getEdgeKindName(Edge::Kind K) {
   switch (K) {
@@ -53,7 +53,7 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) {
 
   for (auto *B : G.blocks())
     for (auto &E : B->edges()) {
-      if (E.getKind() == i386::BranchPCRel32ToPtrJumpStubBypassable) {
+      if (E.getKind() == x86::BranchPCRel32ToPtrJumpStubBypassable) {
         auto &StubBlock = E.getTarget().getBlock();
         assert(StubBlock.getSize() == sizeof(PointerJumpStubContent) &&
                "Stub block should be stub sized");
@@ -72,7 +72,7 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) {
 
         int64_t Displacement = TargetAddr - EdgeAddr + 4;
         if (isInt<32>(Displacement)) {
-          E.setKind(i386::BranchPCRel32);
+          E.setKind(x86::BranchPCRel32);
           E.setTarget(GOTTarget);
           LLVM_DEBUG({
             dbgs() << "  Replaced stub branch with direct branch:\n    ";
@@ -86,4 +86,4 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) {
   return Error::success();
 }
 
-} // namespace llvm::jitlink::i386
+} // namespace llvm::jitlink::x86
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_external_to_absolute_conversion.s b/llvm/test/ExecutionEngine/JITLink/x86/ELF_external_to_absolute_conversion.s
similarity index 100%
rename from llvm/test/ExecutionEngine/JITLink/i386/ELF_external_to_absolute_conversion.s
rename to llvm/test/ExecutionEngine/JITLink/x86/ELF_external_to_absolute_conversion.s
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_absolute_relocations_16.s b/llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_absolute_relocations_16.s
similarity index 100%
rename from llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_absolute_relocations_16.s
rename to llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_absolute_relocations_16.s
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_absolute_relocations_32.s b/llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_absolute_relocations_32.s
similarity index 100%
rename from llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_absolute_relocations_32.s
rename to llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_absolute_relocations_32.s
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_minimal.s b/llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_minimal.s
similarity index 100%
rename from llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_minimal.s
rename to llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_minimal.s
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_pc_relative_relocations_32.s b/llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_pc_relative_relocations_32.s
similarity index 100%
rename from llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_pc_relative_relocations_32.s
rename to llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_pc_relative_relocations_32.s
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s b/llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_small_pic_relocations_got.s
similarity index 100%
rename from llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_got.s
rename to llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_small_pic_relocations_got.s
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_plt.s b/llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_small_pic_relocations_plt.s
similarity index 95%
rename from llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_plt.s
rename to llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_small_pic_relocations_plt.s
index ce565ca2fcdda..e74ee7159f24b 100644
--- a/llvm/test/ExecutionEngine/JITLink/i386/ELF_i386_small_pic_relocations_plt.s
+++ b/llvm/test/ExecutionEngine/JITLink/x86/ELF_x86_small_pic_relocations_plt.s
@@ -21,7 +21,7 @@ main:
 # This produces a Branch32 edge that is resolved like a regular PCRel32 
 # (no PLT entry created).
 # 
-# NOTE - For ELF/i386 we always optimize away the PLT calls as the 
+# NOTE - For ELF/x86 we always optimize away the PLT calls as the 
 # displacement between the target address and the edge address always 
 # fits in an int32_t. Regardless, we always create the PLT stub and GOT entry
 # for position independent code, first, as there may be future use-cases
diff --git a/llvm/test/ExecutionEngine/JITLink/i386/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/x86/lit.local.cfg
similarity index 100%
rename from llvm/test/ExecutionEngine/JITLink/i386/lit.local.cfg
rename to llvm/test/ExecutionEngine/JITLink/x86/lit.local.cfg

>From c16297cd3f0ed9d036e9cf16fb6885aa3c72d5d3 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Wed, 7 May 2025 22:03:33 -0400
Subject: [PATCH 079/115] [CUDA][HIP] Fix host/device attribute of builtin
 (#138162)

When a builtin function is passed a pointer with a different
address space, clang creates an overloaded
builtin function but does not copy the host/device attribute. This
causes
error when the builtin is called by device functions
since CUDA/HIP relies on the host/device attribute to treat
a builtin function as callable on both host and device
sides.

Fixed by copying the host/device attribute of the original
builtin function to the created overloaded builtin function.
---
 clang/lib/Sema/SemaExpr.cpp               |  8 +++++
 clang/test/SemaCUDA/overloaded-builtin.cu | 36 +++++++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 clang/test/SemaCUDA/overloaded-builtin.cu

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index c3ef5a70d5f6d..57135adf714ce 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6362,6 +6362,14 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context,
     Params.push_back(Parm);
   }
   OverloadDecl->setParams(Params);
+  // We cannot merge host/device attributes of redeclarations. They have to
+  // be consistent when created.
+  if (Sema->LangOpts.CUDA) {
+    if (FDecl->hasAttr<CUDAHostAttr>())
+      OverloadDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
+    if (FDecl->hasAttr<CUDADeviceAttr>())
+      OverloadDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+  }
   Sema->mergeDeclAttributes(OverloadDecl, FDecl);
   return OverloadDecl;
 }
diff --git a/clang/test/SemaCUDA/overloaded-builtin.cu b/clang/test/SemaCUDA/overloaded-builtin.cu
new file mode 100644
index 0000000000000..c60c27e7f8627
--- /dev/null
+++ b/clang/test/SemaCUDA/overloaded-builtin.cu
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -fsyntax-only -verify=host -xhip %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fsyntax-only -fcuda-is-device -verify=dev -xhip %s
+
+// dev-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+__global__ void kernel() {                         
+  __attribute__((address_space(0))) void *mem_ptr;
+  (void)__builtin_amdgcn_is_shared(mem_ptr);
+}
+
+template<typename T>
+__global__ void template_kernel(T *p) {                         
+  __attribute__((address_space(0))) void *mem_ptr;
+  (void)__builtin_amdgcn_is_shared(mem_ptr);
+}
+
+void hfun() {
+  __attribute__((address_space(0))) void *mem_ptr;
+  (void)__builtin_amdgcn_is_shared(mem_ptr); // host-error {{reference to __device__ function '__builtin_amdgcn_is_shared' in __host__ function}}
+}
+
+template<typename T>
+void template_hfun(T *p) {
+  __attribute__((address_space(0))) void *mem_ptr;
+  (void)__builtin_amdgcn_is_shared(mem_ptr); // host-error {{reference to __device__ function '__builtin_amdgcn_is_shared' in __host__ function}}
+}
+
+
+int main() {
+  int *p;
+  kernel<<<1,1>>>();
+  template_kernel<<<1,1>>>(p);
+  template_hfun(p); // host-note {{called by 'main'}}
+}

>From 57bc9f000600773a3e0a272e1154df87581c7b57 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames at apple.com>
Date: Thu, 8 May 2025 12:12:26 +1000
Subject: [PATCH 080/115] [JITLink][x86] Update StubsTest unit test for rename
 in b972164f381.

---
 llvm/lib/ExecutionEngine/JITLink/x86.cpp              | 4 ++--
 llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/JITLink/x86.cpp b/llvm/lib/ExecutionEngine/JITLink/x86.cpp
index 2743d4614743d..a07cdd6398a77 100644
--- a/llvm/lib/ExecutionEngine/JITLink/x86.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/x86.cpp
@@ -53,7 +53,7 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) {
 
   for (auto *B : G.blocks())
     for (auto &E : B->edges()) {
-      if (E.getKind() == x86::BranchPCRel32ToPtrJumpStubBypassable) {
+      if (E.getKind() == BranchPCRel32ToPtrJumpStubBypassable) {
         auto &StubBlock = E.getTarget().getBlock();
         assert(StubBlock.getSize() == sizeof(PointerJumpStubContent) &&
                "Stub block should be stub sized");
@@ -72,7 +72,7 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) {
 
         int64_t Displacement = TargetAddr - EdgeAddr + 4;
         if (isInt<32>(Displacement)) {
-          E.setKind(x86::BranchPCRel32);
+          E.setKind(BranchPCRel32);
           E.setTarget(GOTTarget);
           LLVM_DEBUG({
             dbgs() << "  Replaced stub branch with direct branch:\n    ";
diff --git a/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp
index 8a53d0a560ba3..643ea6754f2d1 100644
--- a/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp
+++ b/llvm/unittests/ExecutionEngine/JITLink/StubsTests.cpp
@@ -9,8 +9,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
 #include "llvm/ExecutionEngine/JITLink/aarch64.h"
-#include "llvm/ExecutionEngine/JITLink/i386.h"
 #include "llvm/ExecutionEngine/JITLink/loongarch.h"
+#include "llvm/ExecutionEngine/JITLink/x86.h"
 #include "llvm/ExecutionEngine/JITLink/x86_64.h"
 #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
 #include "llvm/Support/Memory.h"
@@ -96,19 +96,19 @@ TEST(StubsTest, StubsGeneration_aarch64) {
             ArrayRef<char>(PointerJumpStubContent));
 }
 
-TEST(StubsTest, StubsGeneration_i386) {
+TEST(StubsTest, StubsGeneration_x86) {
   const char PointerJumpStubContent[6] = {
       static_cast<char>(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00};
   LinkGraph G("foo", std::make_shared<orc::SymbolStringPool>(),
               Triple("i386-unknown-linux-gnu"), SubtargetFeatures(),
               getGenericEdgeKindName);
-  auto [PointerSym, StubSym] = GenerateStub(G, 4U, i386::Pointer32);
+  auto [PointerSym, StubSym] = GenerateStub(G, 4U, x86::Pointer32);
 
   EXPECT_EQ(std::distance(StubSym.getBlock().edges().begin(),
                           StubSym.getBlock().edges().end()),
             1U);
   auto &JumpEdge = *StubSym.getBlock().edges().begin();
-  EXPECT_EQ(JumpEdge.getKind(), i386::Pointer32);
+  EXPECT_EQ(JumpEdge.getKind(), x86::Pointer32);
   EXPECT_EQ(&JumpEdge.getTarget(), &PointerSym);
   EXPECT_EQ(StubSym.getBlock().getContent(),
             ArrayRef<char>(PointerJumpStubContent));

>From eebb50afaf27961b21847950179febdd20a98866 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot at gmail.com>
Date: Thu, 8 May 2025 02:16:04 +0000
Subject: [PATCH 081/115] [gn build] Port b972164f3813

---
 .../gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn
index 858dc6070cc63..d8dfcbd3bac2d 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn
@@ -29,10 +29,10 @@ static_library("JITLink") {
     "ELFLinkGraphBuilder.cpp",
     "ELF_aarch32.cpp",
     "ELF_aarch64.cpp",
-    "ELF_i386.cpp",
     "ELF_loongarch.cpp",
     "ELF_ppc64.cpp",
     "ELF_riscv.cpp",
+    "ELF_x86.cpp",
     "ELF_x86_64.cpp",
     "JITLink.cpp",
     "JITLinkGeneric.cpp",
@@ -46,10 +46,10 @@ static_library("JITLink") {
     "XCOFF_ppc64.cpp",
     "aarch32.cpp",
     "aarch64.cpp",
-    "i386.cpp",
     "loongarch.cpp",
     "ppc64.cpp",
     "riscv.cpp",
+    "x86.cpp",
     "x86_64.cpp",
   ]
 }

>From dc28f9d087324f77db81e7192648a17ebf036125 Mon Sep 17 00:00:00 2001
From: Timm Baeder <tbaeder at redhat.com>
Date: Thu, 8 May 2025 05:22:11 +0200
Subject: [PATCH 082/115] [clang][ExprConstant] Bail out on invalid lambda
 capture inits (#138832)

Fixes https://github.com/llvm/llvm-project/issues/138824
---
 clang/docs/ReleaseNotes.rst                      | 2 ++
 clang/lib/AST/ByteCode/Compiler.cpp              | 5 ++---
 clang/lib/AST/ExprConstant.cpp                   | 2 +-
 clang/test/SemaCXX/constant-expression-cxx11.cpp | 9 +++++++++
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4c25d6d4d515a..0d8c365609c36 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -564,6 +564,8 @@ Bug Fixes in This Version
   the invalid attribute location appropriately. (#GH137861)
 - Fixed a crash when a malformed ``_Pragma`` directive appears as part of an 
   ``#include`` directive. (#GH138094)
+- Fixed a crash during constant evaluation involving invalid lambda captures
+  (#GH138832)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index ae6574cf99159..3cc55c7052d23 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -2932,10 +2932,9 @@ bool Compiler<Emitter>::VisitLambdaExpr(const LambdaExpr *E) {
   // record with their initializers.
   for (const Record::Field &F : R->fields()) {
     const Expr *Init = *CaptureInitIt;
-    ++CaptureInitIt;
-
-    if (!Init)
+    if (!Init || Init->containsErrors())
       continue;
+    ++CaptureInitIt;
 
     if (std::optional<PrimType> T = classify(Init)) {
       if (!this->visit(Init))
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e5950f461e4b2..500d43accb082 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11038,7 +11038,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) {
 
     // If there is no initializer, either this is a VLA or an error has
     // occurred.
-    if (!CurFieldInit)
+    if (!CurFieldInit || CurFieldInit->containsErrors())
       return Error(E);
 
     LValue Subobject = This;
diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp
index dc8f4bf1666ee..0a135654fab18 100644
--- a/clang/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp
@@ -2598,3 +2598,12 @@ void foo() {
   constexpr S s[2] = { }; // expected-error {{constexpr variable 's' must be initialized by a constant expression}}
 }
 }
+
+namespace DoubleCapture {
+  int DC() {
+  int a = 1000;
+    static auto f =
+      [a, &a] { // expected-error {{'a' can appear only once in a capture list}}
+    };
+  }
+}

>From 92d3029fa4a9c6ce21c50590e57ae834ae3db3bc Mon Sep 17 00:00:00 2001
From: cmtice <cmtice at google.com>
Date: Wed, 7 May 2025 20:39:37 -0700
Subject: [PATCH 083/115] [LLDB] Fix GetIndexOfChildMemberWithName to handle
 anonymous structs. (#138487)

When handling anonymous structs, GetIndexOfChildMemberWithName needs to
add the number of non-empty base classes to the child index, to get the
actual correct index. It was not doing so. This fixes that.
---
 .../TypeSystem/Clang/TypeSystemClang.cpp      |  4 +-
 .../lang/cpp/type_lookup_anon_struct/Makefile |  3 ++
 .../TestCppTypeLookupAnonStruct.py            | 43 +++++++++++++++
 .../lang/cpp/type_lookup_anon_struct/main.cpp | 52 +++++++++++++++++++
 4 files changed, 101 insertions(+), 1 deletion(-)
 create mode 100644 lldb/test/API/lang/cpp/type_lookup_anon_struct/Makefile
 create mode 100644 lldb/test/API/lang/cpp/type_lookup_anon_struct/TestCppTypeLookupAnonStruct.py
 create mode 100644 lldb/test/API/lang/cpp/type_lookup_anon_struct/main.cpp

diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 45f044733c0ff..3b286885cc37f 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -6743,7 +6743,9 @@ size_t TypeSystemClang::GetIndexOfChildMemberWithName(
           if (field_name.empty()) {
             CompilerType field_type = GetType(field->getType());
             std::vector<uint32_t> save_indices = child_indexes;
-            child_indexes.push_back(child_idx);
+            child_indexes.push_back(
+                child_idx + TypeSystemClang::GetNumBaseClasses(
+                                cxx_record_decl, omit_empty_base_classes));
             if (field_type.GetIndexOfChildMemberWithName(
                     name, omit_empty_base_classes, child_indexes))
               return child_indexes.size();
diff --git a/lldb/test/API/lang/cpp/type_lookup_anon_struct/Makefile b/lldb/test/API/lang/cpp/type_lookup_anon_struct/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/lang/cpp/type_lookup_anon_struct/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/lang/cpp/type_lookup_anon_struct/TestCppTypeLookupAnonStruct.py b/lldb/test/API/lang/cpp/type_lookup_anon_struct/TestCppTypeLookupAnonStruct.py
new file mode 100644
index 0000000000000..265a96f7da152
--- /dev/null
+++ b/lldb/test/API/lang/cpp/type_lookup_anon_struct/TestCppTypeLookupAnonStruct.py
@@ -0,0 +1,43 @@
+"""
+Test that we properly print multiple types.
+"""
+
+import lldb
+import lldbsuite.test.lldbutil as lldbutil
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import decorators
+
+
+class TestTypeLookupAnonStruct(TestBase):
+    def test_lookup_anon_struct(self):
+        self.build()
+        lldbutil.run_to_source_breakpoint(
+            self, "// Set breakpoint here", lldb.SBFileSpec("main.cpp")
+        )
+
+        self.expect_var_path("unnamed_derived.y", value="2")
+        self.expect_var_path("unnamed_derived.z", value="13")
+        self.expect(
+            'frame variable "derb.x"',
+            error=True,
+            substrs=['"x" is not a member of "(DerivedB) derb"']
+        )
+        self.expect(
+            'frame variable "derb.y"',
+            error=True,
+            substrs=['"y" is not a member of "(DerivedB) derb"']
+        )
+        self.expect_var_path("derb.w", value="14")
+        self.expect_var_path("derb.k", value="15")
+        self.expect_var_path("derb.a.x", value="1")
+        self.expect_var_path("derb.a.y", value="2")
+
+        self.expect_var_path("multi1.m", value="16")
+        self.expect_var_path("multi1.y", value="30")
+
+        self.expect_var_path("multi2.i", value="42")
+        self.expect_var_path("multi2.w", value="23")
+        self.expect_var_path("multi2.a.x", value="1")
+        self.expect_var_path("multi2.a.y", value="2")
+        self.expect_var_path("multi2.y", value="2")
+        self.expect_var_path("multi2.n", value="7")
diff --git a/lldb/test/API/lang/cpp/type_lookup_anon_struct/main.cpp b/lldb/test/API/lang/cpp/type_lookup_anon_struct/main.cpp
new file mode 100644
index 0000000000000..a9288e6466e74
--- /dev/null
+++ b/lldb/test/API/lang/cpp/type_lookup_anon_struct/main.cpp
@@ -0,0 +1,52 @@
+int main(int argc, char **argv) {
+  struct A {
+    struct {
+      int x = 1;
+    };
+    int y = 2;
+  } a;
+
+  struct B {
+    // Anonymous struct inherits another struct.
+    struct : public A {
+      int z = 3;
+    };
+    int w = 4;
+    A a;
+  } b;
+
+
+  struct EmptyBase {
+  };
+
+  struct : public A {
+    struct {
+      int z = 13;
+    };
+  } unnamed_derived;
+
+  struct DerivedB : public B {
+    struct {
+      // `w` in anonymous struct shadows `w` from `B`.
+      int w = 14;
+      int k = 15;
+    };
+  } derb;
+
+  struct MultiBase : public EmptyBase, public A {
+    struct {
+      int m = 16;
+      int y = 30;
+    };
+  } multi1;
+
+  struct MB2 : public B, EmptyBase, public A {
+    int i = 42;
+    struct {
+      int w = 23;
+      int n = 7;
+    };
+  } multi2;
+
+  return 0; // Set breakpoint here
+}

>From efce7a169e58ec8b27d266ec4dfb851f85a7c6c2 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Wed, 7 May 2025 20:40:04 -0700
Subject: [PATCH 084/115] [lldb-dap] Re-enable the lldb-dap tests (#138791)

Re-enable the lldb-dap tests. We've spent the last week improving the
reliability of the test suite and the tests now pass reliably on macOS
and Linux at desk. Let's see how things fare on the bots.
---
 lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py          | 1 -
 lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py | 1 +
 .../tools/lldb-dap/breakpoint/TestDAP_breakpointLocations.py   | 3 +--
 .../API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py    | 3 +--
 lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py      | 2 --
 .../test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py | 3 +--
 lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py      | 2 --
 lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py          | 3 +--
 lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py    | 3 +--
 9 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
index 741c011a3d692..b5569642f9d34 100644
--- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
+++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
@@ -24,7 +24,6 @@ def spawn_and_wait(program, delay):
     process.wait()
 
 
- at skip
 class TestDAP_attach(lldbdap_testcase.DAPTestCaseBase):
     def set_and_hit_breakpoint(self, continueToExit=True):
         self.dap_server.wait_for_stopped()
diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py
index 7250e67ebcd8c..7c2b540195d15 100644
--- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py
+++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attachByPortNum.py
@@ -18,6 +18,7 @@
 import socket
 
 
+ at skip("https://github.com/llvm/llvm-project/issues/138803")
 class TestDAP_attachByPortNum(lldbdap_testcase.DAPTestCaseBase):
     default_timeout = 20
 
diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_breakpointLocations.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_breakpointLocations.py
index 4a99cacc761a3..1058157e2c668 100644
--- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_breakpointLocations.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_breakpointLocations.py
@@ -11,8 +11,7 @@
 import lldbdap_testcase
 import os
 
-# DAP tests are flakey, see https://github.com/llvm/llvm-project/issues/137660.
- at skip
+
 class TestDAP_breakpointLocations(lldbdap_testcase.DAPTestCaseBase):
     def setUp(self):
         lldbdap_testcase.DAPTestCaseBase.setUp(self)
diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
index 6c6681804f250..26df2573555df 100644
--- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
@@ -11,8 +11,7 @@
 import lldbdap_testcase
 import os
 
-# DAP tests are flakey, see https://github.com/llvm/llvm-project/issues/137660.
- at skip
+
 class TestDAP_setBreakpoints(lldbdap_testcase.DAPTestCaseBase):
     def setUp(self):
         lldbdap_testcase.DAPTestCaseBase.setUp(self)
diff --git a/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py b/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py
index 4aecf9a665c06..223258fbdd3dc 100644
--- a/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py
+++ b/lldb/test/API/tools/lldb-dap/commands/TestDAP_commands.py
@@ -6,8 +6,6 @@
 from lldbsuite.test.decorators import *
 
 
-# DAP tests are flakey, see https://github.com/llvm/llvm-project/issues/137660.
- at skip
 class TestDAP_commands(lldbdap_testcase.DAPTestCaseBase):
     def test_command_directive_quiet_on_success(self):
         program = self.getBuildArtifact("a.out")
diff --git a/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
index ebecb349ac177..9e8ef5b289f2e 100644
--- a/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
+++ b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
@@ -10,8 +10,7 @@
 import lldbdap_testcase
 import os
 
-# DAP tests are flakey, see https://github.com/llvm/llvm-project/issues/137660.
- at skip
+
 class TestDAP_disassemble(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     def test_disassemble(self):
diff --git a/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py b/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
index 19b682dfcd22d..372a9bb75e007 100644
--- a/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
+++ b/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
@@ -11,8 +11,6 @@
 from lldbsuite.test.lldbtest import *
 
 
-# DAP tests are flakey, see https://github.com/llvm/llvm-project/issues/137660.
- at skip
 class TestDAP_evaluate(lldbdap_testcase.DAPTestCaseBase):
     def assertEvaluate(self, expression, regex):
         self.assertRegex(
diff --git a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py
index c71ba871b8a22..ea43fccf016a7 100644
--- a/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py
+++ b/lldb/test/API/tools/lldb-dap/memory/TestDAP_memory.py
@@ -10,8 +10,7 @@
 import lldbdap_testcase
 import os
 
-# DAP tests are flakey, see https://github.com/llvm/llvm-project/issues/137660.
- at skip
+
 class TestDAP_memory(lldbdap_testcase.DAPTestCaseBase):
     def test_memory_refs_variables(self):
         """
diff --git a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
index 560207bfbb66c..3b45cdc245838 100644
--- a/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
+++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
@@ -17,8 +17,7 @@ def make_buffer_verify_dict(start_idx, count, offset=0):
         verify_dict["[%i]" % (i)] = {"type": "int", "value": str(i + offset)}
     return verify_dict
 
-# DAP tests are flakey, see https://github.com/llvm/llvm-project/issues/137660.
- at skip
+
 class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
     def verify_values(self, verify_dict, actual, varref_dict=None, expression=None):
         if "equals" in verify_dict:

>From efd805ed5591b557d66c95c1ca11701ef7bc897d Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Wed, 7 May 2025 21:16:04 -0700
Subject: [PATCH 085/115] MC: Test quoted label

---
 llvm/test/MC/AsmParser/quoted.s | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 llvm/test/MC/AsmParser/quoted.s

diff --git a/llvm/test/MC/AsmParser/quoted.s b/llvm/test/MC/AsmParser/quoted.s
new file mode 100644
index 0000000000000..16b0997827603
--- /dev/null
+++ b/llvm/test/MC/AsmParser/quoted.s
@@ -0,0 +1,15 @@
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -triple x86_64 a.s | FileCheck %s
+# RUN: not llvm-mc -triple x86_64 err.s 2>&1 | FileCheck %s --check-prefix=ERR
+
+#--- a.s
+# CHECK: .type "a b", at function
+# CHECK: "a b":
+.type "a b", @function
+"a b":
+  call "a b"
+
+#--- err.s
+ "a\":
+# ERR: 1:2: error: unterminated string constant
+# ERR: 1:2: error: unexpected token at start of statement

>From 7348d7eccbc452ed1fd9bc219c796f1214a3cc84 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 7 May 2025 21:20:01 -0700
Subject: [PATCH 086/115] [MemProf] Avoid assertion checking loop under NDEBUG
 (NFC) (#138985)

Guard a loop that only exists to do assertion checking of stack ids on
memprof metadata so that it isn't compiled and executed under NDEBUG.
This is similar to how callsite metadata stack id verification is
guarded further below.
---
 .../include/llvm/Analysis/MemoryProfileInfo.h |  4 +-
 .../IPO/MemProfContextDisambiguation.cpp      | 77 +++++++++++--------
 2 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index deb7ab134c161..86c58d1261b71 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -166,7 +166,7 @@ template <class NodeT, class IteratorT> class CallStack {
 
   CallStackIterator begin() const;
   CallStackIterator end() const { return CallStackIterator(N, /*End*/ true); }
-  CallStackIterator beginAfterSharedPrefix(CallStack &Other);
+  CallStackIterator beginAfterSharedPrefix(const CallStack &Other);
   uint64_t back() const;
 
 private:
@@ -204,7 +204,7 @@ CallStack<NodeT, IteratorT>::begin() const {
 
 template <class NodeT, class IteratorT>
 typename CallStack<NodeT, IteratorT>::CallStackIterator
-CallStack<NodeT, IteratorT>::beginAfterSharedPrefix(CallStack &Other) {
+CallStack<NodeT, IteratorT>::beginAfterSharedPrefix(const CallStack &Other) {
   CallStackIterator Cur = begin();
   for (CallStackIterator OtherCur = Other.begin();
        Cur != end() && OtherCur != Other.end(); ++Cur, ++OtherCur)
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index ec158afb76519..4b2683dc6c2a7 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -5050,6 +5050,45 @@ bool MemProfContextDisambiguation::initializeIndirectCallPromotionInfo(
   return true;
 }
 
+#ifndef NDEBUG
+// Sanity check that the MIB stack ids match between the summary and
+// instruction metadata.
+static void checkAllocContextIds(
+    const AllocInfo &AllocNode, const MDNode *MemProfMD,
+    const CallStack<MDNode, MDNode::op_iterator> &CallsiteContext,
+    const ModuleSummaryIndex *ImportSummary) {
+  auto MIBIter = AllocNode.MIBs.begin();
+  for (auto &MDOp : MemProfMD->operands()) {
+    assert(MIBIter != AllocNode.MIBs.end());
+    auto StackIdIndexIter = MIBIter->StackIdIndices.begin();
+    auto *MIBMD = cast<const MDNode>(MDOp);
+    MDNode *StackMDNode = getMIBStackNode(MIBMD);
+    assert(StackMDNode);
+    CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode);
+    auto ContextIterBegin =
+        StackContext.beginAfterSharedPrefix(CallsiteContext);
+    // Skip the checking on the first iteration.
+    uint64_t LastStackContextId =
+        (ContextIterBegin != StackContext.end() && *ContextIterBegin == 0) ? 1
+                                                                           : 0;
+    for (auto ContextIter = ContextIterBegin; ContextIter != StackContext.end();
+         ++ContextIter) {
+      // If this is a direct recursion, simply skip the duplicate
+      // entries, to be consistent with how the summary ids were
+      // generated during ModuleSummaryAnalysis.
+      if (LastStackContextId == *ContextIter)
+        continue;
+      LastStackContextId = *ContextIter;
+      assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
+      assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
+             *ContextIter);
+      StackIdIndexIter++;
+    }
+    MIBIter++;
+  }
+}
+#endif
+
 bool MemProfContextDisambiguation::applyImport(Module &M) {
   assert(ImportSummary);
   bool Changed = false;
@@ -5242,40 +5281,10 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
           assert(AI != FS->allocs().end());
           auto &AllocNode = *(AI++);
 
-          // Sanity check that the MIB stack ids match between the summary and
-          // instruction metadata.
-          auto MIBIter = AllocNode.MIBs.begin();
-          for (auto &MDOp : MemProfMD->operands()) {
-            assert(MIBIter != AllocNode.MIBs.end());
-            LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter =
-                MIBIter->StackIdIndices.begin();
-            auto *MIBMD = cast<const MDNode>(MDOp);
-            MDNode *StackMDNode = getMIBStackNode(MIBMD);
-            assert(StackMDNode);
-            CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode);
-            auto ContextIterBegin =
-                StackContext.beginAfterSharedPrefix(CallsiteContext);
-            // Skip the checking on the first iteration.
-            uint64_t LastStackContextId =
-                (ContextIterBegin != StackContext.end() &&
-                 *ContextIterBegin == 0)
-                    ? 1
-                    : 0;
-            for (auto ContextIter = ContextIterBegin;
-                 ContextIter != StackContext.end(); ++ContextIter) {
-              // If this is a direct recursion, simply skip the duplicate
-              // entries, to be consistent with how the summary ids were
-              // generated during ModuleSummaryAnalysis.
-              if (LastStackContextId == *ContextIter)
-                continue;
-              LastStackContextId = *ContextIter;
-              assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
-              assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
-                     *ContextIter);
-              StackIdIndexIter++;
-            }
-            MIBIter++;
-          }
+#ifndef NDEBUG
+          checkAllocContextIds(AllocNode, MemProfMD, CallsiteContext,
+                               ImportSummary);
+#endif
 
           // Perform cloning if not yet done.
           CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());

>From 20d6375796073f6a0f0ea6abe05ce454a3d617ff Mon Sep 17 00:00:00 2001
From: Prabhu Rajasekaran <prabhukr at google.com>
Date: Wed, 7 May 2025 21:42:01 -0700
Subject: [PATCH 087/115] [clang] Handle CC attrs for UEFI (#138935)

UEFI's default ABI is MS ABI. Handle the calling convention attributes
accordingly.
---
 clang/lib/Basic/Targets/X86.h        |  1 +
 clang/lib/CodeGen/CGCall.cpp         | 13 ++++++++-----
 clang/lib/Sema/SemaChecking.cpp      | 10 +++++-----
 clang/lib/Sema/SemaDeclAttr.cpp      |  9 +++++----
 clang/test/CodeGen/ms_abi.c          |  1 +
 clang/test/CodeGen/sysv_abi.c        |  2 ++
 clang/test/Sema/callingconv-ms_abi.c |  1 +
 clang/test/Sema/varargs-win64.c      |  1 +
 8 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 2f6fb33a7b597..780385f9c9bc5 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -866,6 +866,7 @@ class LLVM_LIBRARY_VISIBILITY UEFIX86_64TargetInfo
     switch (CC) {
     case CC_C:
     case CC_Win64:
+    case CC_X86_64SysV:
       return CCCR_OK;
     default:
       return CCCR_Warning;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 2f1c7699d27c3..9dfd25f9a8d43 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -254,7 +254,7 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
 }
 
 static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
-                                               bool IsWindows) {
+                                               bool IsTargetDefaultMSABI) {
   // Set the appropriate calling convention for the Function.
   if (D->hasAttr<StdCallAttr>())
     return CC_X86StdCall;
@@ -290,10 +290,10 @@ static CallingConv getCallingConventionForDecl(const ObjCMethodDecl *D,
     return CC_IntelOclBicc;
 
   if (D->hasAttr<MSABIAttr>())
-    return IsWindows ? CC_C : CC_Win64;
+    return IsTargetDefaultMSABI ? CC_C : CC_Win64;
 
   if (D->hasAttr<SysVABIAttr>())
-    return IsWindows ? CC_X86_64SysV : CC_C;
+    return IsTargetDefaultMSABI ? CC_X86_64SysV : CC_C;
 
   if (D->hasAttr<PreserveMostAttr>())
     return CC_PreserveMost;
@@ -581,8 +581,11 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
   }
 
   FunctionType::ExtInfo einfo;
-  bool IsWindows = getContext().getTargetInfo().getTriple().isOSWindows();
-  einfo = einfo.withCallingConv(getCallingConventionForDecl(MD, IsWindows));
+  bool IsTargetDefaultMSABI =
+      getContext().getTargetInfo().getTriple().isOSWindows() ||
+      getContext().getTargetInfo().getTriple().isUEFI();
+  einfo = einfo.withCallingConv(
+      getCallingConventionForDecl(MD, IsTargetDefaultMSABI));
 
   if (getContext().getLangOpts().ObjCAutoRefCount &&
       MD->hasAttr<NSReturnsRetainedAttr>())
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 5dd231baa67d9..5a0cec3d112db 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -4868,7 +4868,7 @@ static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) {
   bool IsX64 = TT.getArch() == llvm::Triple::x86_64;
   bool IsAArch64 = (TT.getArch() == llvm::Triple::aarch64 ||
                     TT.getArch() == llvm::Triple::aarch64_32);
-  bool IsWindows = TT.isOSWindows();
+  bool IsWindowsOrUEFI = TT.isOSWindows() || TT.isUEFI();
   bool IsMSVAStart = BuiltinID == Builtin::BI__builtin_ms_va_start;
   if (IsX64 || IsAArch64) {
     CallingConv CC = CC_C;
@@ -4876,7 +4876,7 @@ static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) {
       CC = FD->getType()->castAs<FunctionType>()->getCallConv();
     if (IsMSVAStart) {
       // Don't allow this in System V ABI functions.
-      if (CC == CC_X86_64SysV || (!IsWindows && CC != CC_Win64))
+      if (CC == CC_X86_64SysV || (!IsWindowsOrUEFI && CC != CC_Win64))
         return S.Diag(Fn->getBeginLoc(),
                       diag::err_ms_va_start_used_in_sysv_function);
     } else {
@@ -4884,11 +4884,11 @@ static bool checkVAStartABI(Sema &S, unsigned BuiltinID, Expr *Fn) {
       // On x64 Windows, don't allow this in System V ABI functions.
       // (Yes, that means there's no corresponding way to support variadic
       // System V ABI functions on Windows.)
-      if ((IsWindows && CC == CC_X86_64SysV) ||
-          (!IsWindows && CC == CC_Win64))
+      if ((IsWindowsOrUEFI && CC == CC_X86_64SysV) ||
+          (!IsWindowsOrUEFI && CC == CC_Win64))
         return S.Diag(Fn->getBeginLoc(),
                       diag::err_va_start_used_in_wrong_abi_function)
-               << !IsWindows;
+               << !IsWindowsOrUEFI;
     }
     return false;
   }
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index bfb3ee9dcbd16..6b3824c27d470 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5397,6 +5397,9 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
     }
   }
 
+  bool IsTargetDefaultMSABI =
+      Context.getTargetInfo().getTriple().isOSWindows() ||
+      Context.getTargetInfo().getTriple().isUEFI();
   // TODO: diagnose uses of these conventions on the wrong target.
   switch (Attrs.getKind()) {
   case ParsedAttr::AT_CDecl:
@@ -5436,12 +5439,10 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
     CC = CC_X86RegCall;
     break;
   case ParsedAttr::AT_MSABI:
-    CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_C :
-                                                             CC_Win64;
+    CC = IsTargetDefaultMSABI ? CC_C : CC_Win64;
     break;
   case ParsedAttr::AT_SysVABI:
-    CC = Context.getTargetInfo().getTriple().isOSWindows() ? CC_X86_64SysV :
-                                                             CC_C;
+    CC = IsTargetDefaultMSABI ? CC_X86_64SysV : CC_C;
     break;
   case ParsedAttr::AT_Pcs: {
     StringRef StrRef;
diff --git a/clang/test/CodeGen/ms_abi.c b/clang/test/CodeGen/ms_abi.c
index 0fe1741cf9a0a..528e546f315d5 100644
--- a/clang/test/CodeGen/ms_abi.c
+++ b/clang/test/CodeGen/ms_abi.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 -emit-llvm < %s | FileCheck -check-prefix=FREEBSD %s
 // RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s
+// RUN: %clang_cc1 -triple x86_64-uefi -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s
 
 struct foo {
   int x;
diff --git a/clang/test/CodeGen/sysv_abi.c b/clang/test/CodeGen/sysv_abi.c
index 0df8f1194e605..29ea819c2aa26 100644
--- a/clang/test/CodeGen/sysv_abi.c
+++ b/clang/test/CodeGen/sysv_abi.c
@@ -1,7 +1,9 @@
 // RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm  -target-cpu skylake-avx512 < %s | FileCheck %s --check-prefixes=CHECK,AVX
 // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm  -target-cpu skylake-avx512 < %s | FileCheck %s --check-prefixes=CHECK,AVX
+// RUN: %clang_cc1 -triple x86_64-uefi -emit-llvm  -target-cpu skylake-avx512 < %s | FileCheck %s --check-prefixes=CHECK,AVX
 // RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,NOAVX
 // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,NOAVX
+// RUN: %clang_cc1 -triple x86_64-uefi -emit-llvm < %s | FileCheck %s --check-prefixes=CHECK,NOAVX
 
 #define SYSV_CC __attribute__((sysv_abi))
 
diff --git a/clang/test/Sema/callingconv-ms_abi.c b/clang/test/Sema/callingconv-ms_abi.c
index 9b766f6868629..d68812db9b1f7 100644
--- a/clang/test/Sema/callingconv-ms_abi.c
+++ b/clang/test/Sema/callingconv-ms_abi.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-pc-win32 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-uefi %s
 
 void __attribute__((ms_abi)) foo(void);
 void (*pfoo)(void) = foo;
diff --git a/clang/test/Sema/varargs-win64.c b/clang/test/Sema/varargs-win64.c
index 06d1c7f246b96..8b664eb8dfdf1 100644
--- a/clang/test/Sema/varargs-win64.c
+++ b/clang/test/Sema/varargs-win64.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-pc-win32
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-uefi
 
 void __attribute__((sysv_abi)) foo(int a, ...) {
   __builtin_va_list ap;

>From 98d26b8f67e6abdac24591138f07dc34e7f0e36e Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Wed, 7 May 2025 22:14:21 -0700
Subject: [PATCH 088/115] [libc][bazel] Re-enable memcpy prefetching on x86.
 (#138945)

It was re-enabled downstream after further performance analysis, so we
can revert c65ed964657c93d51f3e05de9e0609419768a143, effectively
re-landing the change.
---
 .../bazel/llvm-project-overlay/libc/libc_configure_options.bzl  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
index f65da9e98226b..96d7fa86e9ddf 100644
--- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
+++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
@@ -24,7 +24,7 @@ LIBC_CONFIGURE_OPTIONS = [
     # Documentation in libc/src/string/memory_utils/...
     # "LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY",
     # "LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE",
-    # "LIBC_COPT_MEMCPY_X86_USE_SOFTWARE_PREFETCHING",
+    "LIBC_COPT_MEMCPY_X86_USE_SOFTWARE_PREFETCHING",
     "LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING",
 
     # Documentation in libc/docs/dev/printf_behavior.rst

>From 28521368d74a7ea264ce7cf2f51e48f92c4f53a5 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht at google.com>
Date: Thu, 8 May 2025 00:16:40 -0500
Subject: [PATCH 089/115] [bazel] Port 6babd63a4bbc094bee4ef8e75f95dccd32325c15
 (#139026)

---
 utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index 733a214ed1720..de49b84d0948c 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
@@ -2456,6 +2456,7 @@ llvm_target_lib_list = [lib for lib in [
             "lib/Target/XCore/XCoreGenDisassemblerTables.inc": ["-gen-disassembler"],
             "lib/Target/XCore/XCoreGenInstrInfo.inc": ["-gen-instr-info"],
             "lib/Target/XCore/XCoreGenRegisterInfo.inc": ["-gen-register-info"],
+            "lib/Target/XCore/XCoreGenSDNodeInfo.inc": ["-gen-sd-node-info"],
             "lib/Target/XCore/XCoreGenSubtargetInfo.inc": ["-gen-subtarget"],
         },
     },

>From 215dbcb2bc5c1b1bc8775db2a7c22f67f3949fd7 Mon Sep 17 00:00:00 2001
From: Alexey Samsonov <vonosmas at gmail.com>
Date: Wed, 7 May 2025 22:18:05 -0700
Subject: [PATCH 090/115] [bazel] Enable header processing for C++ builds.
 (#138934)

This would only work for projects that explicitly enable "parse_headers"
feature (or for builds that specify this on command-line) - right now
there are none, so this change shouldn't affect most builds. When
"parse_headers" is enabled though, it would catch problems of
incorrect/missing includes in header-only cc_libraries.

See https://bazel.build/docs/bazel-and-cpp#toolchain-features on why
this option is a best practice for C++ projects.
---
 utils/bazel/.bazelrc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/utils/bazel/.bazelrc b/utils/bazel/.bazelrc
index a52e21e87ee0f..409c4bd8b633c 100644
--- a/utils/bazel/.bazelrc
+++ b/utils/bazel/.bazelrc
@@ -58,6 +58,11 @@ build --experimental_cc_shared_library
 # https://github.com/bazelbuild/bazel/commit/03246077f948f2790a83520e7dccc2625650e6df
 build --build_runfile_links=false
 
+# Enable header processing to verify that header-only libraries are
+# self-contained (for sub-projects that enable "parse_headers" feature).
+# See https://bazel.build/docs/bazel-and-cpp#toolchain-features
+build --process_headers_in_dependencies
+
 ###############################################################################
 # Options to select different strategies for linking potential dependent
 # libraries. The default leaves it disabled.

>From ab2e7aa5179ab7ba83fa7f731df63a1adbf7612c Mon Sep 17 00:00:00 2001
From: dyung <douglas.yung at sony.com>
Date: Thu, 8 May 2025 01:38:04 -0400
Subject: [PATCH 091/115] Revert "[Clang] Deprecate
 `__is_trivially_relocatable`" (#139027)

Reverts llvm/llvm-project#138835

This is causing a test failure on a bot:
https://lab.llvm.org/buildbot/#/builders/144/builds/24541
---
 clang/docs/LanguageExtensions.rst            |   8 +-
 clang/docs/ReleaseNotes.rst                  |   9 --
 clang/include/clang/Basic/TokenKinds.def     |   6 +-
 clang/lib/Sema/SemaExprCXX.cpp               |   3 -
 clang/test/SemaCXX/attr-trivial-abi.cpp      | 116 ++++++-------------
 clang/test/SemaCXX/ptrauth-triviality.cpp    |  35 ++----
 clang/test/SemaCXX/type-traits-nonobject.cpp |  16 +--
 7 files changed, 53 insertions(+), 140 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index f56f2a640bb36..ebcad44197ce4 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -1859,18 +1859,12 @@ The following type trait primitives are supported by Clang. Those traits marked
 * ``__is_trivially_constructible`` (C++, GNU, Microsoft)
 * ``__is_trivially_copyable`` (C++, GNU, Microsoft)
 * ``__is_trivially_destructible`` (C++, MSVC 2013)
-* ``__is_trivially_relocatable`` (Clang) (Deprecated,
-  use ``__builtin_is_cpp_trivially_relocatable`` instead).
-  Returns true if moving an object
+* ``__is_trivially_relocatable`` (Clang): Returns true if moving an object
   of the given type, and then destroying the source object, is known to be
   functionally equivalent to copying the underlying bytes and then dropping the
   source object on the floor. This is true of trivial types,
   C++26 relocatable types, and types which
   were made trivially relocatable via the ``clang::trivial_abi`` attribute.
-  This trait is deprecated and should be replaced by
-  ``__builtin_is_cpp_trivially_relocatable``. Note however that it is generally
-  unsafe to relocate a C++-relocatable type with ``memcpy`` or ``memmove``;
-  use ``__builtin_trivially_relocate``.
 * ``__builtin_is_cpp_trivially_relocatable`` (C++): Returns true if an object
   is trivially relocatable, as defined by the C++26 standard [meta.unary.prop].
   Note that when relocating the caller code should ensure that if the object is polymorphic,
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 0d8c365609c36..e5b463173dcf4 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -579,15 +579,6 @@ Bug Fixes to Compiler Builtins
 - ``__has_unique_object_representations(Incomplete[])`` is no longer accepted, per
   `LWG4113 <https://cplusplus.github.io/LWG/issue4113>`_.
 
-- ``__builtin_is_cpp_trivially_relocatable``, ``__builtin_is_replaceable`` and
-  ``__builtin_trivially_relocate`` have been added to support standard C++26 relocation.
-
-- ``__is_trivially_relocatable`` has been deprecated, and uses should be replaced by
-  ``__builtin_is_cpp_trivially_relocatable``.
-  Note that, it is generally unsafe to ``memcpy`` non-trivially copyable types that
-  are ``__builtin_is_cpp_trivially_relocatable``. It is recommended to use
-  ``__builtin_trivially_relocate`` instead.
-
 Bug Fixes to Attribute Support
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  - Fixed crash when a parameter to the ``clang::annotate`` attribute evaluates to ``void``. See #GH119125
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 94e72fea56a68..9bc63689d1363 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -544,6 +544,7 @@ TYPE_TRAIT_2(__is_pointer_interconvertible_base_of, IsPointerInterconvertibleBas
 #include "clang/Basic/TransformTypeTraits.def"
 
 // Clang-only C++ Type Traits
+TYPE_TRAIT_1(__is_trivially_relocatable, IsTriviallyRelocatable, KEYCXX)
 TYPE_TRAIT_1(__is_trivially_equality_comparable, IsTriviallyEqualityComparable, KEYCXX)
 TYPE_TRAIT_1(__is_bounded_array, IsBoundedArray, KEYCXX)
 TYPE_TRAIT_1(__is_unbounded_array, IsUnboundedArray, KEYCXX)
@@ -555,11 +556,8 @@ TYPE_TRAIT_2(__reference_converts_from_temporary, ReferenceConvertsFromTemporary
 // IsDeducible is only used internally by clang for CTAD implementation and
 // is not exposed to users.
 TYPE_TRAIT_2(/*EmptySpellingName*/, IsDeducible, KEYCXX)
-
-// __is_trivially_relocatable is deprecated
-TYPE_TRAIT_1(__builtin_is_cpp_trivially_relocatable, IsCppTriviallyRelocatable, KEYCXX)
-TYPE_TRAIT_1(__is_trivially_relocatable, IsTriviallyRelocatable, KEYCXX)
 TYPE_TRAIT_1(__is_bitwise_cloneable, IsBitwiseCloneable, KEYALL)
+TYPE_TRAIT_1(__builtin_is_cpp_trivially_relocatable, IsCppTriviallyRelocatable, KEYCXX)
 TYPE_TRAIT_1(__builtin_is_replaceable, IsReplaceable, KEYCXX)
 TYPE_TRAIT_1(__builtin_structured_binding_size, StructuredBindingSize, KEYCXX)
 
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index b2a982e953012..8bdc2300b0392 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -6449,9 +6449,6 @@ void DiagnoseBuiltinDeprecation(Sema& S, TypeTrait Kind,
     case UTT_HasTrivialDestructor:
       Replacement = UTT_IsTriviallyDestructible;
       break;
-    case UTT_IsTriviallyRelocatable:
-      Replacement = clang::UTT_IsCppTriviallyRelocatable;
-      break;
     default:
       return;
   }
diff --git a/clang/test/SemaCXX/attr-trivial-abi.cpp b/clang/test/SemaCXX/attr-trivial-abi.cpp
index 333ab34bc5d51..e018ccda2d8d9 100644
--- a/clang/test/SemaCXX/attr-trivial-abi.cpp
+++ b/clang/test/SemaCXX/attr-trivial-abi.cpp
@@ -1,6 +1,4 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++11
-// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-windows-msvc -std=c++11
-
 
 void __attribute__((trivial_abi)) foo(); // expected-warning {{'trivial_abi' attribute only applies to classes}}
 
@@ -12,38 +10,30 @@ class __attribute__((trivial_abi)) a { a(a &&); };
 // (And it is only trivially relocatable, currently, if it is trivial for calls.)
 // In this case, it is suppressed by an explicitly defined move constructor.
 // Similar concerns apply to later tests that have #if defined(_WIN64) && !defined(__MINGW32__)
-static_assert(!__is_trivially_relocatable(a<int>), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(a<int>), "");
+static_assert(!__is_trivially_relocatable(a<int>), "");
 #else
-static_assert(__is_trivially_relocatable(a<int>), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(a<int>), "");
+static_assert(__is_trivially_relocatable(a<int>), "");
 #endif
 
 struct [[clang::trivial_abi]] S0 {
   int a;
 };
-static_assert(__is_trivially_relocatable(S0), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S0), "");
+static_assert(__is_trivially_relocatable(S0), "");
 
 struct __attribute__((trivial_abi)) S1 {
   int a;
 };
-static_assert(__is_trivially_relocatable(S1), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S1), "");
-
+static_assert(__is_trivially_relocatable(S1), "");
 
 struct __attribute__((trivial_abi)) S3 { // expected-warning {{'trivial_abi' cannot be applied to 'S3'}} expected-note {{is polymorphic}}
   virtual void m();
 };
-static_assert(!__is_trivially_relocatable(S3), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S3), "");
-
+static_assert(!__is_trivially_relocatable(S3), "");
 
 struct S3_2 {
   virtual void m();
 } __attribute__((trivial_abi)); // expected-warning {{'trivial_abi' cannot be applied to 'S3_2'}} expected-note {{is polymorphic}}
-static_assert(!__is_trivially_relocatable(S3_2), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S3_2), "");
+static_assert(!__is_trivially_relocatable(S3_2), "");
 
 struct __attribute__((trivial_abi)) S3_3 { // expected-warning {{'trivial_abi' cannot be applied to 'S3_3'}} expected-note {{has a field of a non-trivial class type}}
   S3_3(S3_3 &&);
@@ -53,13 +43,9 @@ struct __attribute__((trivial_abi)) S3_3 { // expected-warning {{'trivial_abi' c
 // The ClangABI4OrPS4 calling convention kind passes classes in registers if the
 // copy constructor is trivial for calls *or deleted*, while other platforms do
 // not accept deleted constructors.
-static_assert(__is_trivially_relocatable(S3_3), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S3_3), "");
-
+static_assert(__is_trivially_relocatable(S3_3), "");
 #else
-static_assert(!__is_trivially_relocatable(S3_3), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(S3_3), "");
-
+static_assert(!__is_trivially_relocatable(S3_3), "");
 #endif
 
 // Diagnose invalid trivial_abi even when the type is templated because it has a non-trivial field.
@@ -68,28 +54,20 @@ struct __attribute__((trivial_abi)) S3_4 { // expected-warning {{'trivial_abi' c
   S3_4(S3_4 &&);
   S3_2 s32;
 };
-static_assert(!__is_trivially_relocatable(S3_4<int>), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(S3_4<int>), "");
-
+static_assert(!__is_trivially_relocatable(S3_4<int>), "");
 
 struct S4 {
   int a;
 };
-static_assert(__is_trivially_relocatable(S4), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S4), "");
-
+static_assert(__is_trivially_relocatable(S4), "");
 
 struct __attribute__((trivial_abi)) S5 : public virtual S4 { // expected-warning {{'trivial_abi' cannot be applied to 'S5'}} expected-note {{has a virtual base}}
 };
-static_assert(!__is_trivially_relocatable(S5), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(S5), "");
-
+static_assert(!__is_trivially_relocatable(S5), "");
 
 struct __attribute__((trivial_abi)) S9 : public S4 {
 };
-static_assert(__is_trivially_relocatable(S9), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S9), "");
-
+static_assert(__is_trivially_relocatable(S9), "");
 
 struct __attribute__((trivial_abi(1))) S8 { // expected-error {{'trivial_abi' attribute takes no arguments}}
   int a;
@@ -102,12 +80,8 @@ struct __attribute__((trivial_abi)) S10 {
 };
 
 S10<int *> p1;
-static_assert(__is_trivially_relocatable(S10<int>), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S10<int>), "");
-
-static_assert(__is_trivially_relocatable(S10<S3>), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S10<S3>), "");
-
+static_assert(__is_trivially_relocatable(S10<int>), "");
+static_assert(__is_trivially_relocatable(S10<S3>), "");
 
 template <class T>
 struct S14 {
@@ -119,21 +93,15 @@ struct __attribute__((trivial_abi)) S15 : S14<T> {
 };
 
 S15<int> s15;
-static_assert(__is_trivially_relocatable(S15<int>), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S15<int>), "");
-
-static_assert(__is_trivially_relocatable(S15<S3>), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S15<S3>), "");
+static_assert(__is_trivially_relocatable(S15<int>), "");
+static_assert(__is_trivially_relocatable(S15<S3>), "");
 
 template <class T>
 struct __attribute__((trivial_abi)) S16 {
   S14<T> a;
 };
-static_assert(__is_trivially_relocatable(S16<int>), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S16<int>), "");
-
-static_assert(__is_trivially_relocatable(S16<S3>), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S16<S3>), "");
+static_assert(__is_trivially_relocatable(S16<int>), "");
+static_assert(__is_trivially_relocatable(S16<S3>), "");
 
 S16<int> s16;
 
@@ -142,12 +110,8 @@ struct __attribute__((trivial_abi)) S17 {
 };
 
 S17<int> s17;
-static_assert(__is_trivially_relocatable(S17<int>), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S17<int>), "");
-
-static_assert(__is_trivially_relocatable(S17<S3>), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S17<S3>), "");
-
+static_assert(__is_trivially_relocatable(S17<int>), "");
+static_assert(__is_trivially_relocatable(S17<S3>), "");
 
 namespace deletedCopyMoveConstructor {
 struct __attribute__((trivial_abi)) CopyMoveDeleted { // expected-warning {{'trivial_abi' cannot be applied to 'CopyMoveDeleted'}} expected-note {{copy constructors and move constructors are all deleted}}
@@ -155,24 +119,18 @@ struct __attribute__((trivial_abi)) CopyMoveDeleted { // expected-warning {{'tri
   CopyMoveDeleted(CopyMoveDeleted &&) = delete;
 };
 #ifdef __ORBIS__
-static_assert(__is_trivially_relocatable(CopyMoveDeleted), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(CopyMoveDeleted), "");
-
+static_assert(__is_trivially_relocatable(CopyMoveDeleted), "");
 #else
-static_assert(!__is_trivially_relocatable(CopyMoveDeleted), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(CopyMoveDeleted), "");
-
+static_assert(!__is_trivially_relocatable(CopyMoveDeleted), "");
 #endif
 
 struct __attribute__((trivial_abi)) S18 { // expected-warning {{'trivial_abi' cannot be applied to 'S18'}} expected-note {{copy constructors and move constructors are all deleted}}
   CopyMoveDeleted a;
 };
 #ifdef __ORBIS__
-static_assert(__is_trivially_relocatable(S18), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S18), "");
+static_assert(__is_trivially_relocatable(S18), "");
 #else
-static_assert(!__is_trivially_relocatable(S18), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(S18), "");
+static_assert(!__is_trivially_relocatable(S18), "");
 #endif
 
 struct __attribute__((trivial_abi)) CopyDeleted {
@@ -180,29 +138,25 @@ struct __attribute__((trivial_abi)) CopyDeleted {
   CopyDeleted(CopyDeleted &&) = default;
 };
 #if defined(_WIN64) && !defined(__MINGW32__)
-static_assert(!__is_trivially_relocatable(CopyDeleted), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(CopyDeleted), "");
-
+static_assert(!__is_trivially_relocatable(CopyDeleted), "");
 #else
-static_assert(__is_trivially_relocatable(CopyDeleted), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(CopyDeleted), "");
+static_assert(__is_trivially_relocatable(CopyDeleted), "");
 #endif
 
 struct __attribute__((trivial_abi)) MoveDeleted {
   MoveDeleted(const MoveDeleted &) = default;
   MoveDeleted(MoveDeleted &&) = delete;
 };
-static_assert(__is_trivially_relocatable(MoveDeleted), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(MoveDeleted), "");
+static_assert(__is_trivially_relocatable(MoveDeleted), "");
+
 struct __attribute__((trivial_abi)) S19 { // expected-warning {{'trivial_abi' cannot be applied to 'S19'}} expected-note {{copy constructors and move constructors are all deleted}}
   CopyDeleted a;
   MoveDeleted b;
 };
 #ifdef __ORBIS__
-static_assert(__is_trivially_relocatable(S19), ""); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S19), "");
-static_assert(!__is_trivially_relocatable(S19), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(S19), "");
+static_assert(__is_trivially_relocatable(S19), "");
+#else
+static_assert(!__is_trivially_relocatable(S19), "");
 #endif
 
 // This is fine since the move constructor isn't deleted.
@@ -210,12 +164,8 @@ struct __attribute__((trivial_abi)) S20 {
   int &&a; // a member of rvalue reference type deletes the copy constructor.
 };
 #if defined(_WIN64) && !defined(__MINGW32__)
-static_assert(!__is_trivially_relocatable(S20), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(S20), "");
-
+static_assert(!__is_trivially_relocatable(S20), "");
 #else
-static_assert(__is_trivially_relocatable(S20), ""); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(S20), "");
-
+static_assert(__is_trivially_relocatable(S20), "");
 #endif
 } // namespace deletedCopyMoveConstructor
diff --git a/clang/test/SemaCXX/ptrauth-triviality.cpp b/clang/test/SemaCXX/ptrauth-triviality.cpp
index 785e83aaaa545..ce6e1a7646558 100644
--- a/clang/test/SemaCXX/ptrauth-triviality.cpp
+++ b/clang/test/SemaCXX/ptrauth-triviality.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple arm64-apple-ios -std=c++20 -fptrauth-calls -fptrauth-intrinsics -verify -fsyntax-only %s
 // RUN: %clang_cc1 -triple aarch64-linux-gnu -std=c++20 -fptrauth-calls -fptrauth-intrinsics -verify -fsyntax-only %s
+// expected-no-diagnostics
 
 #define AQ __ptrauth(1,1,50)
 #define IQ __ptrauth(1,0,50)
@@ -23,8 +24,7 @@ static_assert(!__is_trivially_constructible(S1, const S1&));
 static_assert(!__is_trivially_assignable(S1, const S1&));
 static_assert(__is_trivially_destructible(S1));
 static_assert(!__is_trivially_copyable(S1));
-static_assert(!__is_trivially_relocatable(S1)); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(S1));
+static_assert(!__is_trivially_relocatable(S1));
 static_assert(!__is_trivially_equality_comparable(S1));
 
 static_assert(__is_trivially_constructible(Holder<S1>));
@@ -32,8 +32,7 @@ static_assert(!__is_trivially_constructible(Holder<S1>, const Holder<S1>&));
 static_assert(!__is_trivially_assignable(Holder<S1>, const Holder<S1>&));
 static_assert(__is_trivially_destructible(Holder<S1>));
 static_assert(!__is_trivially_copyable(Holder<S1>));
-static_assert(!__is_trivially_relocatable(Holder<S1>)); // expected-warning{{deprecated}}
-static_assert(!__builtin_is_cpp_trivially_relocatable(Holder<S1>));
+static_assert(!__is_trivially_relocatable(Holder<S1>));
 static_assert(!__is_trivially_equality_comparable(Holder<S1>));
 
 struct S2 {
@@ -46,8 +45,7 @@ static_assert(__is_trivially_constructible(S2, const S2&));
 static_assert(__is_trivially_assignable(S2, const S2&));
 static_assert(__is_trivially_destructible(S2));
 static_assert(__is_trivially_copyable(S2));
-static_assert(__is_trivially_relocatable(S2)); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(S2));
+static_assert(__is_trivially_relocatable(S2));
 static_assert(__is_trivially_equality_comparable(S2));
 
 static_assert(__is_trivially_constructible(Holder<S2>));
@@ -55,8 +53,7 @@ static_assert(__is_trivially_constructible(Holder<S2>, const Holder<S2>&));
 static_assert(__is_trivially_assignable(Holder<S2>, const Holder<S2>&));
 static_assert(__is_trivially_destructible(Holder<S2>));
 static_assert(__is_trivially_copyable(Holder<S2>));
-static_assert(__is_trivially_relocatable(Holder<S2>)); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(Holder<S2>));
+static_assert(__is_trivially_relocatable(Holder<S2>));
 static_assert(__is_trivially_equality_comparable(Holder<S2>));
 
 struct AA S3 {
@@ -70,19 +67,15 @@ static_assert(!__is_trivially_constructible(S3, const S3&));
 static_assert(!__is_trivially_assignable(S3, const S3&));
 static_assert(__is_trivially_destructible(S3));
 static_assert(!__is_trivially_copyable(S3));
-static_assert(!__is_trivially_relocatable(S3)); // expected-warning{{deprecated}}
-//FIXME
-static_assert(__builtin_is_cpp_trivially_relocatable(S3));
+static_assert(!__is_trivially_relocatable(S3));
 static_assert(!__is_trivially_equality_comparable(S3));
 
-
 static_assert(!__is_trivially_constructible(Holder<S3>));
 static_assert(!__is_trivially_constructible(Holder<S3>, const Holder<S3>&));
 static_assert(!__is_trivially_assignable(Holder<S3>, const Holder<S3>&));
 static_assert(__is_trivially_destructible(Holder<S3>));
 static_assert(!__is_trivially_copyable(Holder<S3>));
-static_assert(__is_trivially_relocatable(Holder<S3>)); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(Holder<S3>));
+static_assert(__is_trivially_relocatable(Holder<S3>));
 static_assert(!__is_trivially_equality_comparable(Holder<S3>));
 
 struct IA S4 {
@@ -96,9 +89,7 @@ static_assert(!__is_trivially_constructible(S4, const S4&));
 static_assert(!__is_trivially_assignable(S4, const S4&));
 static_assert(__is_trivially_destructible(S4));
 static_assert(!__is_trivially_copyable(S4));
-static_assert(!__is_trivially_relocatable(S4)); // expected-warning{{deprecated}}
-//FIXME
-static_assert(__builtin_is_cpp_trivially_relocatable(S4));
+static_assert(!__is_trivially_relocatable(S4));
 static_assert(!__is_trivially_equality_comparable(S4));
 
 static_assert(!__is_trivially_constructible(Holder<S4>));
@@ -106,8 +97,7 @@ static_assert(!__is_trivially_constructible(Holder<S4>, const Holder<S4>&));
 static_assert(!__is_trivially_assignable(Holder<S4>, const Holder<S4>&));
 static_assert(__is_trivially_destructible(Holder<S4>));
 static_assert(!__is_trivially_copyable(Holder<S4>));
-static_assert(__is_trivially_relocatable(Holder<S4>)); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(Holder<S4>));
+static_assert(__is_trivially_relocatable(Holder<S4>));
 static_assert(!__is_trivially_equality_comparable(Holder<S4>));
 
 struct PA S5 {
@@ -121,9 +111,7 @@ static_assert(!__is_trivially_constructible(S5, const S5&));
 static_assert(!__is_trivially_assignable(S5, const S5&));
 static_assert(__is_trivially_destructible(S5));
 static_assert(!__is_trivially_copyable(S5));
-static_assert(!__is_trivially_relocatable(S5)); // expected-warning{{deprecated}}
-//FIXME
-static_assert(__builtin_is_cpp_trivially_relocatable(S5));
+static_assert(!__is_trivially_relocatable(S5));
 static_assert(!__is_trivially_equality_comparable(S5));
 
 static_assert(!__is_trivially_constructible(Holder<S5>));
@@ -131,6 +119,5 @@ static_assert(!__is_trivially_constructible(Holder<S5>, const Holder<S5>&));
 static_assert(!__is_trivially_assignable(Holder<S5>, const Holder<S5>&));
 static_assert(__is_trivially_destructible(Holder<S5>));
 static_assert(!__is_trivially_copyable(Holder<S5>));
-static_assert(__is_trivially_relocatable(Holder<S5>)); // expected-warning{{deprecated}}
-static_assert(__builtin_is_cpp_trivially_relocatable(Holder<S5>));
+static_assert(__is_trivially_relocatable(Holder<S5>));
 static_assert(!__is_trivially_equality_comparable(Holder<S5>));
diff --git a/clang/test/SemaCXX/type-traits-nonobject.cpp b/clang/test/SemaCXX/type-traits-nonobject.cpp
index 1763d735547b9..5f7c20cc2e11c 100644
--- a/clang/test/SemaCXX/type-traits-nonobject.cpp
+++ b/clang/test/SemaCXX/type-traits-nonobject.cpp
@@ -1,6 +1,8 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s
 
+// expected-no-diagnostics
+
 static_assert(!__is_pod(void), "");
 static_assert(!__is_pod(int&), "");
 static_assert(!__is_pod(int()), "");
@@ -11,13 +13,7 @@ static_assert(!__is_trivially_copyable(int&), "");
 static_assert(!__is_trivially_copyable(int()), "");
 static_assert(!__is_trivially_copyable(int()&), "");
 
-static_assert(!__is_trivially_relocatable(void), ""); // expected-warning{{deprecated}}
-static_assert(!__is_trivially_relocatable(int&), ""); // expected-warning{{deprecated}}
-static_assert(!__is_trivially_relocatable(int()), ""); // expected-warning{{deprecated}}
-static_assert(!__is_trivially_relocatable(int()&), ""); // expected-warning{{deprecated}}
-
-
-static_assert(!__builtin_is_cpp_trivially_relocatable(void), "");
-static_assert(!__builtin_is_cpp_trivially_relocatable(int&), "");
-static_assert(!__builtin_is_cpp_trivially_relocatable(int()), "");
-static_assert(!__builtin_is_cpp_trivially_relocatable(int()&), "");
+static_assert(!__is_trivially_relocatable(void), "");
+static_assert(!__is_trivially_relocatable(int&), "");
+static_assert(!__is_trivially_relocatable(int()), "");
+static_assert(!__is_trivially_relocatable(int()&), "");

>From fa09d031d3f3a5018e626938ef76b7072a298dfd Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Wed, 7 May 2025 22:41:57 -0700
Subject: [PATCH 092/115] [clang-format] Correctly annotate ObjC `*
 __autoreleasing *` (#138799)

Fix #138484
---
 clang/lib/Format/FormatToken.h                | 8 ++++++++
 clang/lib/Format/TokenAnnotator.cpp           | 3 ++-
 clang/unittests/Format/TokenAnnotatorTest.cpp | 6 ++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 946cd7b81587f..b570171d032ac 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -711,6 +711,14 @@ struct FormatToken {
                          tok::objc_package, tok::objc_private);
   }
 
+  bool isObjCLifetimeQualifier(const FormatStyle &Style) const {
+    if (Style.Language != FormatStyle::LK_ObjC || !TokenText.starts_with("__"))
+      return false;
+    const auto Qualifier = TokenText.substr(2);
+    return Qualifier == "autoreleasing" || Qualifier == "strong" ||
+           Qualifier == "weak" || Qualifier == "unsafe_unretained";
+  }
+
   /// Returns whether \p Tok is ([{ or an opening < of a template or in
   /// protos.
   bool opensScope() const {
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index ac6551b2bd1ad..f0f9207564ab1 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3100,7 +3100,8 @@ class AnnotatingParser {
       if (NextNextToken) {
         if (NextNextToken->is(tok::arrow))
           return TT_BinaryOperator;
-        if (NextNextToken->isPointerOrReference()) {
+        if (NextNextToken->isPointerOrReference() &&
+            !NextToken->isObjCLifetimeQualifier(Style)) {
           NextNextToken->setFinalizedType(TT_BinaryOperator);
           return TT_BinaryOperator;
         }
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index bcb2b6f33d1ad..7982ccb167b53 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -411,6 +411,12 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) {
   ASSERT_EQ(Tokens.size(), 27u) << Tokens;
   EXPECT_TOKEN(Tokens[16], tok::star, TT_BinaryOperator);
   EXPECT_TOKEN(Tokens[22], tok::star, TT_BinaryOperator);
+
+  Tokens = annotate("NSError *__autoreleasing *foo;",
+                    getLLVMStyle(FormatStyle::LK_ObjC));
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::star, TT_PointerOrReference);
+  EXPECT_TOKEN(Tokens[3], tok::star, TT_PointerOrReference);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) {

>From 26572bad95f816a979ce70b4e1335c8438a96df2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 8 May 2025 07:46:04 +0200
Subject: [PATCH 093/115] clang/OpenCL: Add baseline test showing broken
 codegen (#138862)

---
 .../CodeGenCXX/amdgcn-automatic-variable.cpp  | 24 +++++++
 .../amdgcn-automatic-variable.cl              | 66 +++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp b/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
index c1f9310141000..3c2a624bd4f95 100644
--- a/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-automatic-variable.cpp
@@ -133,3 +133,27 @@ void func7() {
   use(&x);
 }
 
+#define __private __attribute__((opencl_private))
+
+// CHECK-LABEL: @_Z34explicit_private_address_space_ptrv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT:    [[ALLOCA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[ALLOCA_ADDR_AS_PRIVATE:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// CHECK-NEXT:    [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr
+// CHECK-NEXT:    [[ALLOCA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR]] to ptr
+// CHECK-NEXT:    [[ALLOCA_ADDR_AS_PRIVATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR_AS_PRIVATE]] to ptr
+// CHECK-NEXT:    store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[VAR_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[VAR_ASCAST]] to ptr addrspace(5)
+// CHECK-NEXT:    store ptr addrspace(5) [[VAR_ASCAST_ASCAST]], ptr [[ALLOCA_ADDR_AS_PRIVATE_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(5), ptr [[ALLOCA_ADDR_AS_PRIVATE_ASCAST]], align 4
+// CHECK-NEXT:    store i64 8, ptr addrspace(5) [[TMP0]], align 8
+// CHECK-NEXT:    ret void
+//
+void explicit_private_address_space_ptr() {
+  long var;
+  long *alloca_addr = &var;
+
+  __private long *alloca_addr_as_private = (__private long *)&var;
+  *alloca_addr_as_private = 8;
+}
diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
index dba6519966eb5..e28120adc0364 100644
--- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
+++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -109,3 +109,69 @@ void func2(void) {
 void func3(void) {
   float a[16][1] = {{0.}};
 }
+
+// CL12-LABEL: define dso_local void @wrong_store_type_private_pointer_alloca(
+// CL12-SAME: ) #[[ATTR0]] {
+// CL12-NEXT:  [[ENTRY:.*:]]
+// CL12-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
+// CL12-NEXT:    [[ALLOCA_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// CL12-NEXT:    store i64 5, ptr addrspace(5) [[VAR]], align 8
+// CL12-NEXT:    store ptr addrspace(5) [[VAR]], ptr addrspace(5) [[ALLOCA_ADDR]], align 4
+// CL12-NEXT:    [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOCA_ADDR]], align 4
+// CL12-NEXT:    store i64 8, ptr addrspace(5) [[TMP0]], align 8
+// CL12-NEXT:    ret void
+//
+// CL20-LABEL: define dso_local void @wrong_store_type_private_pointer_alloca(
+// CL20-SAME: ) #[[ATTR0]] {
+// CL20-NEXT:  [[ENTRY:.*:]]
+// CL20-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
+// CL20-NEXT:    [[ALLOCA_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// CL20-NEXT:    [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr
+// CL20-NEXT:    [[ALLOCA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR]] to ptr
+// CL20-NEXT:    store i64 5, ptr [[VAR_ASCAST]], align 8
+// CL20-NEXT:    store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_ASCAST]], align 4
+// CL20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(5), ptr [[ALLOCA_ADDR_ASCAST]], align 4
+// CL20-NEXT:    store i64 8, ptr addrspace(5) [[TMP0]], align 8
+// CL20-NEXT:    ret void
+//
+void wrong_store_type_private_pointer_alloca() {
+  long var = 5;
+
+  // This needs to write an addrspace(5) pointer to the temporary alloca,
+  // which should be allocated with the correct size.
+  __private long *alloca_addr = &var;
+  *alloca_addr = 8;
+}
+
+// CL12-LABEL: define dso_local void @wrong_store_type_private_pointer_as_generic_alloca(
+// CL12-SAME: ) #[[ATTR0]] {
+// CL12-NEXT:  [[ENTRY:.*:]]
+// CL12-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
+// CL12-NEXT:    [[ALLOCA_ADDR_AS_GENERIC:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
+// CL12-NEXT:    store i64 5, ptr addrspace(5) [[VAR]], align 8
+// CL12-NEXT:    store ptr addrspace(5) [[VAR]], ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]], align 4
+// CL12-NEXT:    [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]], align 4
+// CL12-NEXT:    store i64 9, ptr addrspace(5) [[TMP0]], align 8
+// CL12-NEXT:    ret void
+//
+// CL20-LABEL: define dso_local void @wrong_store_type_private_pointer_as_generic_alloca(
+// CL20-SAME: ) #[[ATTR0]] {
+// CL20-NEXT:  [[ENTRY:.*:]]
+// CL20-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
+// CL20-NEXT:    [[ALLOCA_ADDR_AS_GENERIC:%.*]] = alloca ptr, align 8, addrspace(5)
+// CL20-NEXT:    [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr
+// CL20-NEXT:    [[ALLOCA_ADDR_AS_GENERIC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]] to ptr
+// CL20-NEXT:    store i64 5, ptr [[VAR_ASCAST]], align 8
+// CL20-NEXT:    store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8
+// CL20-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8
+// CL20-NEXT:    store i64 9, ptr [[TMP0]], align 8
+// CL20-NEXT:    ret void
+//
+void wrong_store_type_private_pointer_as_generic_alloca() {
+  long var = 5;
+
+  // This needs to write an addrspace(0) pointer to the temporary alloca in
+  // CL2.0, which should be allocated with the correct size.
+  long *alloca_addr_as_generic = &var;
+  *alloca_addr_as_generic = 9;
+}

>From 5df01abe191ff4f848566e239798a2b4d26e1cf4 Mon Sep 17 00:00:00 2001
From: Filip Milosevic <54005272+MightyFilipns at users.noreply.github.com>
Date: Thu, 8 May 2025 07:47:56 +0200
Subject: [PATCH 094/115] [clang-format] Add SpaceAfterOperatorKeyword option
 (#137610)

Add SpaceAfterOperatorKeyword option to clang-format
---
 clang/docs/ClangFormatStyleOptions.rst     | 10 ++++++++++
 clang/docs/ReleaseNotes.rst                |  1 +
 clang/include/clang/Format/Format.h        |  9 +++++++++
 clang/lib/Format/Format.cpp                |  3 +++
 clang/lib/Format/TokenAnnotator.cpp        |  2 +-
 clang/unittests/Format/ConfigParseTest.cpp |  1 +
 clang/unittests/Format/FormatTest.cpp      |  6 ++++++
 7 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index b47291599649d..a4c381bf583b6 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -6127,6 +6127,16 @@ the configuration (without a prefix: ``Auto``).
      true:                                  false:
      ! someExpression();            vs.     !someExpression();
 
+.. _SpaceAfterOperatorKeyword:
+
+**SpaceAfterOperatorKeyword** (``Boolean``) :versionbadge:`clang-format 21` :ref:`¶ <SpaceAfterOperatorKeyword>`
+  If ``true``, a space will be inserted after the ``operator`` keyword.
+
+  .. code-block:: c++
+
+     true:                                false:
+     bool operator ==(int a);     vs.     bool operator==(int a);
+
 .. _SpaceAfterTemplateKeyword:
 
 **SpaceAfterTemplateKeyword** (``Boolean``) :versionbadge:`clang-format 4` :ref:`¶ <SpaceAfterTemplateKeyword>`
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index e5b463173dcf4..c52e285bde627 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -810,6 +810,7 @@ clang-format
 - Add ``EnumTrailingComma`` option for inserting/removing commas at the end of
   ``enum`` enumerator lists.
 - Add ``OneLineFormatOffRegex`` option for turning formatting off for one line.
+- Add ``SpaceAfterOperatorKeyword`` option.
 
 libclang
 --------
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 7fe41d800ccb3..b86c4bd00eb91 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -4484,6 +4484,14 @@ struct FormatStyle {
   /// \version 9
   bool SpaceAfterLogicalNot;
 
+  /// If ``true``, a space will be inserted after the ``operator`` keyword.
+  /// \code
+  ///    true:                                false:
+  ///    bool operator ==(int a);     vs.     bool operator==(int a);
+  /// \endcode
+  /// \version 21
+  bool SpaceAfterOperatorKeyword;
+
   /// If \c true, a space will be inserted after the ``template`` keyword.
   /// \code
   ///    true:                                  false:
@@ -5454,6 +5462,7 @@ struct FormatStyle {
            SortJavaStaticImport == R.SortJavaStaticImport &&
            SpaceAfterCStyleCast == R.SpaceAfterCStyleCast &&
            SpaceAfterLogicalNot == R.SpaceAfterLogicalNot &&
+           SpaceAfterOperatorKeyword == R.SpaceAfterOperatorKeyword &&
            SpaceAfterTemplateKeyword == R.SpaceAfterTemplateKeyword &&
            SpaceBeforeAssignmentOperators == R.SpaceBeforeAssignmentOperators &&
            SpaceBeforeCaseColon == R.SpaceBeforeCaseColon &&
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 2f4b64ef4f5fe..20b5352b83a9e 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1152,6 +1152,8 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations);
     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
     IO.mapOptional("SpaceAfterLogicalNot", Style.SpaceAfterLogicalNot);
+    IO.mapOptional("SpaceAfterOperatorKeyword",
+                   Style.SpaceAfterOperatorKeyword);
     IO.mapOptional("SpaceAfterTemplateKeyword",
                    Style.SpaceAfterTemplateKeyword);
     IO.mapOptional("SpaceAroundPointerQualifiers",
@@ -1639,6 +1641,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.SortUsingDeclarations = FormatStyle::SUD_LexicographicNumeric;
   LLVMStyle.SpaceAfterCStyleCast = false;
   LLVMStyle.SpaceAfterLogicalNot = false;
+  LLVMStyle.SpaceAfterOperatorKeyword = false;
   LLVMStyle.SpaceAfterTemplateKeyword = true;
   LLVMStyle.SpaceAroundPointerQualifiers = FormatStyle::SAPQ_Default;
   LLVMStyle.SpaceBeforeAssignmentOperators = true;
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index f0f9207564ab1..542c362ccacae 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -5033,7 +5033,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     }
 
     if (Left.is(tok::kw_operator))
-      return Right.is(tok::coloncolon);
+      return Right.is(tok::coloncolon) || Style.SpaceAfterOperatorKeyword;
     if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
         !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) {
       return true;
diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp
index f7ab5546c7193..bd27a9f60ffcc 100644
--- a/clang/unittests/Format/ConfigParseTest.cpp
+++ b/clang/unittests/Format/ConfigParseTest.cpp
@@ -204,6 +204,7 @@ TEST(ConfigParseTest, ParsesConfigurationBools) {
   CHECK_PARSE_BOOL(SpacesInContainerLiterals);
   CHECK_PARSE_BOOL(SpaceAfterCStyleCast);
   CHECK_PARSE_BOOL(SpaceAfterTemplateKeyword);
+  CHECK_PARSE_BOOL(SpaceAfterOperatorKeyword);
   CHECK_PARSE_BOOL(SpaceAfterLogicalNot);
   CHECK_PARSE_BOOL(SpaceBeforeAssignmentOperators);
   CHECK_PARSE_BOOL(SpaceBeforeCaseColon);
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index f85e078696730..436beaf68bd2a 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -17291,6 +17291,12 @@ TEST_F(FormatTest, CalculatesOriginalColumn) {
                "         comment */");
 }
 
+TEST_F(FormatTest, SpaceAfterOperatorKeyword) {
+  auto SpaceAfterOperatorKeyword = getLLVMStyle();
+  SpaceAfterOperatorKeyword.SpaceAfterOperatorKeyword = true;
+  verifyFormat("bool operator ++(int a);", SpaceAfterOperatorKeyword);
+}
+
 TEST_F(FormatTest, ConfigurableSpaceBeforeParens) {
   FormatStyle NoSpace = getLLVMStyle();
   NoSpace.SpaceBeforeParens = FormatStyle::SBPO_Never;

>From a11d86461e7d7d9bce3d04a39ded1cad394239ca Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 8 May 2025 07:51:57 +0200
Subject: [PATCH 095/115] clang: Fix broken implicit cast to generic address
 space (#138863)

This fixes emitting undefined behavior where a 64-bit generic
pointer is written to a 32-bit slot allocated for a private pointer.
This can be seen in test/CodeGenOpenCL/amdgcn-automatic-variable.cl's
wrong_pointer_alloca.
---
 clang/lib/CodeGen/CGDecl.cpp                  |  3 +-
 clang/lib/CodeGen/CGExpr.cpp                  | 17 ++--
 clang/lib/CodeGen/CodeGenFunction.h           | 20 ++++-
 .../CodeGenOpenCL/addr-space-struct-arg.cl    |  5 +-
 .../amdgcn-automatic-variable.cl              | 45 ++++------
 .../amdgpu-abi-struct-arg-byref.cl            |  5 +-
 .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl    | 90 ++++++++-----------
 clang/test/CodeGenOpenCL/amdgpu-nullptr.cl    | 28 +++---
 clang/test/CodeGenOpenCL/blocks.cl            |  6 +-
 clang/test/CodeGenOpenCL/builtins-alloca.cl   | 48 ++++------
 .../CodeGenOpenCL/builtins-amdgcn-gfx12.cl    | 10 +--
 ...plicit-addrspacecast-function-parameter.cl |  7 +-
 clang/test/Index/pipe-size.cl                 |  2 +-
 13 files changed, 129 insertions(+), 157 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index db34e2738b4cf..1e54e55c5abbb 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1588,7 +1588,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
       // Create the alloca.  Note that we set the name separately from
       // building the instruction so that it's there even in no-asserts
       // builds.
-      address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName(),
+      address = CreateTempAlloca(allocaTy, Ty.getAddressSpace(),
+                                 allocaAlignment, D.getName(),
                                  /*ArraySize=*/nullptr, &AllocaAddr);
 
       // Don't emit lifetime markers for MSVC catch parameters. The lifetime of
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 6f5ead78f2b23..1a835c97decef 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -100,13 +100,11 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits Align,
   return RawAddress(Alloca, Ty, Align, KnownNonNull);
 }
 
-/// CreateTempAlloca - This creates a alloca and inserts it into the entry
-/// block. The alloca is casted to default address space if necessary.
-RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
-                                             const Twine &Name,
+RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
+                                             CharUnits Align, const Twine &Name,
                                              llvm::Value *ArraySize,
                                              RawAddress *AllocaAddr) {
-  auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
+  RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
   if (AllocaAddr)
     *AllocaAddr = Alloca;
   llvm::Value *V = Alloca.getPointer();
@@ -114,8 +112,9 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
   // be different from the type defined by the language. For example,
   // in C++ the auto variables are in the default address space. Therefore
   // cast alloca to the default address space when necessary.
-  if (getASTAllocaAddressSpace() != LangAS::Default) {
-    auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
+
+  unsigned DestAddrSpace = getContext().getTargetAddressSpace(DestLangAS);
+  if (DestAddrSpace != Alloca.getAddressSpace()) {
     llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
     // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
     // otherwise alloca is inserted at the current insertion point of the
@@ -123,8 +122,8 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
     if (!ArraySize)
       Builder.SetInsertPoint(getPostAllocaInsertPoint());
     V = getTargetHooks().performAddrSpaceCast(
-        *this, V, getASTAllocaAddressSpace(), LangAS::Default,
-        Builder.getPtrTy(DestAddrSpace), /*non-null*/ true);
+        *this, V, getASTAllocaAddressSpace(), DestLangAS,
+        Builder.getPtrTy(DestAddrSpace), /*IsNonNull=*/true);
   }
 
   return RawAddress(V, Ty, Align, KnownNonNull);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 561f8f6a2a2fb..c0bc3825f0188 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2861,10 +2861,28 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// more efficient if the caller knows that the address will not be exposed.
   llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, const Twine &Name = "tmp",
                                      llvm::Value *ArraySize = nullptr);
+
+  /// CreateTempAlloca - This creates a alloca and inserts it into the entry
+  /// block. The alloca is casted to the address space of \p UseAddrSpace if
+  /// necessary.
+  RawAddress CreateTempAlloca(llvm::Type *Ty, LangAS UseAddrSpace,
+                              CharUnits align, const Twine &Name = "tmp",
+                              llvm::Value *ArraySize = nullptr,
+                              RawAddress *Alloca = nullptr);
+
+  /// CreateTempAlloca - This creates a alloca and inserts it into the entry
+  /// block. The alloca is casted to default address space if necessary.
+  ///
+  /// FIXME: This version should be removed, and context should provide the
+  /// context use address space used instead of default.
   RawAddress CreateTempAlloca(llvm::Type *Ty, CharUnits align,
                               const Twine &Name = "tmp",
                               llvm::Value *ArraySize = nullptr,
-                              RawAddress *Alloca = nullptr);
+                              RawAddress *Alloca = nullptr) {
+    return CreateTempAlloca(Ty, LangAS::Default, align, Name, ArraySize,
+                            Alloca);
+  }
+
   RawAddress CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits align,
                                          const Twine &Name = "tmp",
                                          llvm::Value *ArraySize = nullptr);
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 49604c6c5e61b..a70e9af75fa38 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -798,10 +798,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
 // AMDGCN20-SAME: ) #[[ATTR0]] {
 // AMDGCN20-NEXT:  [[ENTRY:.*:]]
 // AMDGCN20-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
-// AMDGCN20-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN20-NEXT:    [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
-// AMDGCN20-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
-// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
+// AMDGCN20-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]]
 // AMDGCN20-NEXT:    ret void
 //
 //
diff --git a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
index e28120adc0364..af50928d8ecf0 100644
--- a/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
+++ b/clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -55,22 +55,19 @@ void func1(int *x) {
 // CL20-NEXT:    [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
 // CL20-NEXT:    [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
 // CL20-NEXT:    [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
+// CL20-NEXT:    store i32 1, ptr addrspace(5) [[LV1]], align 4
+// CL20-NEXT:    store i32 2, ptr addrspace(5) [[LV2]], align 4
+// CL20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
+// CL20-NEXT:    store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
 // CL20-NEXT:    [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
-// CL20-NEXT:    [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr
-// CL20-NEXT:    [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr
-// CL20-NEXT:    [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr
-// CL20-NEXT:    [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr
-// CL20-NEXT:    [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr
-// CL20-NEXT:    store i32 1, ptr [[LV1_ASCAST]], align 4
-// CL20-NEXT:    store i32 2, ptr [[LV2_ASCAST]], align 4
-// CL20-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
-// CL20-NEXT:    store i32 3, ptr [[ARRAYIDX]], align 4
-// CL20-NEXT:    store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
-// CL20-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
-// CL20-NEXT:    store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
-// CL20-NEXT:    call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
-// CL20-NEXT:    store i32 4, ptr [[LVC_ASCAST]], align 4
-// CL20-NEXT:    store i32 4, ptr [[LV1_ASCAST]], align 4
+// CL20-NEXT:    store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8
+// CL20-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
+// CL20-NEXT:    [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr
+// CL20-NEXT:    store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8
+// CL20-NEXT:    [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
+// CL20-NEXT:    call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]]
+// CL20-NEXT:    store i32 4, ptr addrspace(5) [[LVC]], align 4
+// CL20-NEXT:    store i32 4, ptr addrspace(5) [[LV1]], align 4
 // CL20-NEXT:    ret void
 //
 void func2(void) {
@@ -102,8 +99,7 @@ void func2(void) {
 // CL20-SAME: ) #[[ATTR0]] {
 // CL20-NEXT:  [[ENTRY:.*:]]
 // CL20-NEXT:    [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
-// CL20-NEXT:    [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
-// CL20-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false)
+// CL20-NEXT:    call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false)
 // CL20-NEXT:    ret void
 //
 void func3(void) {
@@ -126,11 +122,9 @@ void func3(void) {
 // CL20-NEXT:  [[ENTRY:.*:]]
 // CL20-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
 // CL20-NEXT:    [[ALLOCA_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
-// CL20-NEXT:    [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr
-// CL20-NEXT:    [[ALLOCA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR]] to ptr
-// CL20-NEXT:    store i64 5, ptr [[VAR_ASCAST]], align 8
-// CL20-NEXT:    store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_ASCAST]], align 4
-// CL20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(5), ptr [[ALLOCA_ADDR_ASCAST]], align 4
+// CL20-NEXT:    store i64 5, ptr addrspace(5) [[VAR]], align 8
+// CL20-NEXT:    store ptr addrspace(5) [[VAR]], ptr addrspace(5) [[ALLOCA_ADDR]], align 4
+// CL20-NEXT:    [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOCA_ADDR]], align 4
 // CL20-NEXT:    store i64 8, ptr addrspace(5) [[TMP0]], align 8
 // CL20-NEXT:    ret void
 //
@@ -159,11 +153,10 @@ void wrong_store_type_private_pointer_alloca() {
 // CL20-NEXT:  [[ENTRY:.*:]]
 // CL20-NEXT:    [[VAR:%.*]] = alloca i64, align 8, addrspace(5)
 // CL20-NEXT:    [[ALLOCA_ADDR_AS_GENERIC:%.*]] = alloca ptr, align 8, addrspace(5)
+// CL20-NEXT:    store i64 5, ptr addrspace(5) [[VAR]], align 8
 // CL20-NEXT:    [[VAR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAR]] to ptr
-// CL20-NEXT:    [[ALLOCA_ADDR_AS_GENERIC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]] to ptr
-// CL20-NEXT:    store i64 5, ptr [[VAR_ASCAST]], align 8
-// CL20-NEXT:    store ptr [[VAR_ASCAST]], ptr [[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8
-// CL20-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ALLOCA_ADDR_AS_GENERIC_ASCAST]], align 8
+// CL20-NEXT:    store ptr [[VAR_ASCAST]], ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]], align 8
+// CL20-NEXT:    [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[ALLOCA_ADDR_AS_GENERIC]], align 8
 // CL20-NEXT:    store i64 9, ptr [[TMP0]], align 8
 // CL20-NEXT:    ret void
 //
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
index 7d0a66bac1469..a1a114ef129a1 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
@@ -272,10 +272,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
 // AMDGCN-SAME: ) #[[ATTR0]] {
 // AMDGCN-NEXT:  [[ENTRY:.*:]]
 // AMDGCN-NEXT:    [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
-// AMDGCN-NEXT:    [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
-// AMDGCN-NEXT:    [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
-// AMDGCN-NEXT:    call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
-// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR4]]
+// AMDGCN-NEXT:    call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR4]]
 // AMDGCN-NEXT:    ret void
 //
 //
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index a0e11a1b5997e..bbb55b7e14941 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -136,9 +136,6 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
 // NOCPU-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
 // NOCPU-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
-// NOCPU-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
-// NOCPU-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
-// NOCPU-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
 // NOCPU-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
 // NOCPU-NEXT:    [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
 // NOCPU-NEXT:    [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr
@@ -146,17 +143,16 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr
 // NOCPU-NEXT:    [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
 // NOCPU-NEXT:    [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
-// NOCPU-NEXT:    [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
 // NOCPU-NEXT:    [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
 // NOCPU-NEXT:    [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
 // NOCPU-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1
 // NOCPU-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8
-// NOCPU-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
+// NOCPU-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
 // NOCPU-NEXT:    [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
 // NOCPU-NEXT:    store i32 25, ptr [[BLOCK_SIZE]], align 8
 // NOCPU-NEXT:    [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
@@ -170,9 +166,9 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1
 // NOCPU-NEXT:    store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8
 // NOCPU-NEXT:    [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr [[BLOCK_ASCAST]])
-// NOCPU-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
+// NOCPU-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
 // NOCPU-NEXT:    [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
 // NOCPU-NEXT:    store i32 41, ptr [[BLOCK_SIZE4]], align 8
 // NOCPU-NEXT:    [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1
@@ -192,9 +188,9 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8
 // NOCPU-NEXT:    [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr [[BLOCK3_ASCAST]])
-// NOCPU-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
+// NOCPU-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
 // NOCPU-NEXT:    [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
 // NOCPU-NEXT:    store i32 41, ptr [[BLOCK_SIZE13]], align 8
 // NOCPU-NEXT:    [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1
@@ -228,11 +224,11 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
 // NOCPU-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8
 // NOCPU-NEXT:    store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8
-// NOCPU-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
-// NOCPU-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8
+// NOCPU-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8
+// NOCPU-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
+// NOCPU-NEXT:    [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8
 // NOCPU-NEXT:    [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]])
 // NOCPU-NEXT:    ret void
 //
@@ -259,16 +255,13 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
 // NOCPU-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
 // NOCPU-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
-// NOCPU-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
-// NOCPU-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
-// NOCPU-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
 // NOCPU-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
 // NOCPU-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8
-// NOCPU-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4
+// NOCPU-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4
 // NOCPU-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
-// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8
-// NOCPU-NEXT:    [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4
-// NOCPU-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false)
+// NOCPU-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8
+// NOCPU-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4
+// NOCPU-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false)
 // NOCPU-NEXT:    [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
 // NOCPU-NEXT:    ret void
 //
@@ -517,9 +510,6 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
 // GFX900-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
 // GFX900-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
-// GFX900-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
-// GFX900-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
-// GFX900-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
 // GFX900-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
 // GFX900-NEXT:    [[BLOCK_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK]] to ptr
 // GFX900-NEXT:    [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP2]] to ptr
@@ -527,7 +517,6 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[TMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP11]] to ptr
 // GFX900-NEXT:    [[BLOCK12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK12]] to ptr
 // GFX900-NEXT:    [[BLOCK_SIZES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK_SIZES]] to ptr
-// GFX900-NEXT:    [[BLOCK20_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK20]] to ptr
 // GFX900-NEXT:    [[BLOCK21_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BLOCK21]] to ptr
 // GFX900-NEXT:    [[TMP27_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VARTMP27]] to ptr
 // GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8, !tbaa [[TBAA14]]
@@ -536,11 +525,11 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9:[0-9]+]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
-// GFX900-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17:![0-9]+]]
+// GFX900-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17:![0-9]+]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
-// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19:![0-9]+]]
-// GFX900-NEXT:    [[TMP1:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19:![0-9]+]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21:![0-9]+]]
 // GFX900-NEXT:    [[BLOCK_SIZE:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 25, ptr [[BLOCK_SIZE]], align 8
 // GFX900-NEXT:    [[BLOCK_ALIGN:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), i8 }>, ptr [[BLOCK_ASCAST]], i32 0, i32 1
@@ -554,9 +543,9 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, !tbaa [[TBAA16]]
 // GFX900-NEXT:    store i8 [[TMP3]], ptr [[BLOCK_CAPTURED1]], align 8, !tbaa [[TBAA16]]
 // GFX900-NEXT:    [[TMP4:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP0]], i32 [[TMP1]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr [[BLOCK_ASCAST]])
-// GFX900-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP6:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
+// GFX900-NEXT:    [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
+// GFX900-NEXT:    [[TMP6:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP2_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[BLOCK_SIZE4:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 41, ptr [[BLOCK_SIZE4]], align 8
 // GFX900-NEXT:    [[BLOCK_ALIGN5:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK3_ASCAST]], i32 0, i32 1
@@ -576,9 +565,9 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[TMP10:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, !tbaa [[TBAA3]]
 // GFX900-NEXT:    store i64 [[TMP10]], ptr [[BLOCK_CAPTURED10]], align 8, !tbaa [[TBAA3]]
 // GFX900-NEXT:    [[TMP11:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP5]], i32 [[TMP6]], ptr addrspace(5) [[VARTMP2]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr [[BLOCK3_ASCAST]])
-// GFX900-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP13:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
+// GFX900-NEXT:    [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
+// GFX900-NEXT:    [[TMP13:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP11_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[BLOCK_SIZE13:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 0
 // GFX900-NEXT:    store i32 41, ptr [[BLOCK_SIZE13]], align 8
 // GFX900-NEXT:    [[BLOCK_ALIGN14:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 }>, ptr [[BLOCK12_ASCAST]], i32 0, i32 1
@@ -615,11 +604,11 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[BLOCK_CAPTURED26:%.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i64, ptr addrspace(1) }>, ptr [[BLOCK21_ASCAST]], i32 0, i32 4
 // GFX900-NEXT:    [[TMP21:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !tbaa [[TBAA7]]
 // GFX900-NEXT:    store ptr addrspace(1) [[TMP21]], ptr [[BLOCK_CAPTURED26]], align 8, !tbaa [[TBAA7]]
-// GFX900-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]]
-// GFX900-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP23:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP27_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
-// GFX900-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[BLOCK20_ASCAST]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    store ptr [[BLOCK21_ASCAST]], ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA16]]
+// GFX900-NEXT:    [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
+// GFX900-NEXT:    [[TMP23:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP27_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
+// GFX900-NEXT:    [[TMP24:%.*]] = load ptr, ptr addrspace(5) [[BLOCK20]], align 8, !tbaa [[TBAA16]]
 // GFX900-NEXT:    [[TMP25:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP22]], i32 [[TMP23]], ptr addrspace(5) [[VARTMP27]], ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr [[BLOCK21_ASCAST]])
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[BLOCK20]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
@@ -650,19 +639,16 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[NDRANGE:%.*]] = alloca [[STRUCT_NDRANGE_T:%.*]], align 4, addrspace(5)
 // GFX900-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_NDRANGE_T]], align 4, addrspace(5)
 // GFX900-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
-// GFX900-NEXT:    [[DEFAULT_QUEUE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEFAULT_QUEUE]] to ptr
-// GFX900-NEXT:    [[FLAGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FLAGS]] to ptr
-// GFX900-NEXT:    [[NDRANGE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[NDRANGE]] to ptr
 // GFX900-NEXT:    [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
 // GFX900-NEXT:    store ptr addrspace(1) [[I]], ptr [[I_ADDR_ASCAST]], align 8, !tbaa [[TBAA26]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[DEFAULT_QUEUE]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
-// GFX900-NEXT:    store i32 0, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    store i32 0, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
 // GFX900-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
 // GFX900-NEXT:    [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.memtime()
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[DEFAULT_QUEUE_ASCAST]], align 8, !tbaa [[TBAA19]]
-// GFX900-NEXT:    [[TMP2:%.*]] = load i32, ptr [[FLAGS_ASCAST]], align 4, !tbaa [[TBAA17]]
-// GFX900-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_ASCAST]], ptr align 4 [[NDRANGE_ASCAST]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[DEFAULT_QUEUE]], align 8, !tbaa [[TBAA19]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[FLAGS]], align 4, !tbaa [[TBAA17]]
+// GFX900-NEXT:    call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[TMP_ASCAST]], ptr addrspace(5) align 4 [[NDRANGE]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT21]]
 // GFX900-NEXT:    [[TMP3:%.*]] = call i32 @__enqueue_kernel_basic(ptr addrspace(1) [[TMP1]], i32 [[TMP2]], ptr addrspace(5) [[TMP]], ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr), ptr addrspacecast (ptr addrspace(1) @__block_literal_global to ptr))
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[NDRANGE]]) #[[ATTR9]]
 // GFX900-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[FLAGS]]) #[[ATTR9]]
diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
index a0c106bca83c9..d0bcd1fccb7ce 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl
@@ -139,12 +139,12 @@ void test_static_var_local(void) {
 
 // Test function-scope variable initialization.
 // NOOPT-LABEL: @test_func_scope_var_private(
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp1{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp2{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) null, ptr %sp3{{.*}}, align 4
-// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr %sp4{{.*}}, align 4
-// NOOPT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false)
-// NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS2{{.*}}, i8 0, i64 24, i1 false)
+// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp1{{.*}}, align 4
+// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp2{{.*}}, align 4
+// NOOPT: store ptr addrspace(5) null, ptr addrspace(5) %sp3{{.*}}, align 4
+// NOOPT: store ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)), ptr addrspace(5) %sp4{{.*}}, align 4
+// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_private.SS1, i64 32, i1 false)
+// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, i8 0, i64 24, i1 false)
 void test_func_scope_var_private(void) {
   private char *sp1 = 0;
   private char *sp2 = NULL;
@@ -157,12 +157,12 @@ void test_func_scope_var_private(void) {
 
 // Test function-scope variable initialization.
 // NOOPT-LABEL: @test_func_scope_var_local(
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp1{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp2{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) null, ptr %sp3{{.*}}, align 4
-// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr %sp4{{.*}}, align 4
-// NOOPT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false)
-// NOOPT: call void @llvm.memset.p0.i64(ptr align 8 %SS2{{.*}}, i8 0, i64 24, i1 false)
+// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp1{{.*}}, align 4
+// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp2{{.*}}, align 4
+// NOOPT: store ptr addrspace(3) null, ptr addrspace(5) %sp3{{.*}}, align 4
+// NOOPT: store ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), ptr addrspace(5) %sp4{{.*}}, align 4
+// NOOPT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 8 %SS1{{.*}}, ptr addrspace(4) align 8 @__const.test_func_scope_var_local.SS1, i64 32, i1 false)
+// NOOPT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 8 %SS2{{.*}}, i8 0, i64 24, i1 false)
 void test_func_scope_var_local(void) {
   local char *sp1 = 0;
   local char *sp2 = NULL;
@@ -603,7 +603,7 @@ int test_and_ptr(private char* p1, local char* p2) {
 // Test folding of null pointer in function scope.
 // NOOPT-LABEL: test_fold_private
 // NOOPT: call void @test_fold_callee
-// NOOPT: store ptr addrspace(1) null, ptr %glob{{.*}}, align 8
+// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8
 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
 // NOOPT: call void @test_fold_callee
 // NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(5) addrspacecast (ptr null to ptr addrspace(5)) to i32) to i64
@@ -619,7 +619,7 @@ void test_fold_private(void) {
 
 // NOOPT-LABEL: test_fold_local
 // NOOPT: call void @test_fold_callee
-// NOOPT: store ptr addrspace(1) null, ptr %glob{{.*}}, align 8
+// NOOPT: store ptr addrspace(1) null, ptr addrspace(5) %glob{{.*}}, align 8
 // NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
 // NOOPT: call void @test_fold_callee
 // NOOPT: %[[SEXT:.*]] = sext i32 ptrtoint (ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)) to i32) to i64
diff --git a/clang/test/CodeGenOpenCL/blocks.cl b/clang/test/CodeGenOpenCL/blocks.cl
index 161f1406c96cb..20212cf213ffe 100644
--- a/clang/test/CodeGenOpenCL/blocks.cl
+++ b/clang/test/CodeGenOpenCL/blocks.cl
@@ -44,10 +44,10 @@ void foo(){
   // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %[[block:.*]], i32 0, i32 2
   // AMDGCN: store ptr @__foo_block_invoke, ptr %[[block_invoke]]
   // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds nuw <{ i32, i32, ptr, i32 }>, ptr %[[block]], i32 0, i32 3
-  // AMDGCN: %[[i_value:.*]] = load i32, ptr %i
+  // AMDGCN: %[[i_value:.*]] = load i32, ptr addrspace(5) %i
   // AMDGCN: store i32 %[[i_value]], ptr %[[block_captured]],
-  // AMDGCN: store ptr %[[block]], ptr %[[block_B:.*]],
-  // AMDGCN: %[[block_literal:.*]] = load ptr, ptr %[[block_B]]
+  // AMDGCN: store ptr %[[block]], ptr addrspace(5) %[[block_B:.*]],
+  // AMDGCN: %[[block_literal:.*]] = load ptr, ptr addrspace(5) %[[block_B]]
   // AMDGCN: call {{.*}}i32 @__foo_block_invoke(ptr noundef %[[block_literal]])
 
   int (^ block_B)(void) = ^{
diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl
index 85b449e45a0f1..ce7da3aba9e45 100644
--- a/clang/test/CodeGenOpenCL/builtins-alloca.cl
+++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl
@@ -39,13 +39,12 @@
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca(
@@ -67,13 +66,12 @@
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test1_builtin_alloca(unsigned n) {
@@ -111,13 +109,12 @@ void test1_builtin_alloca(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_uninitialized(
@@ -139,13 +136,12 @@ void test1_builtin_alloca(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test1_builtin_alloca_uninitialized(unsigned n) {
@@ -183,13 +179,12 @@ void test1_builtin_alloca_uninitialized(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_with_align(
@@ -211,13 +206,12 @@ void test1_builtin_alloca_uninitialized(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test1_builtin_alloca_with_align(unsigned n) {
@@ -255,13 +249,12 @@ void test1_builtin_alloca_with_align(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized(
@@ -283,13 +276,12 @@ void test1_builtin_alloca_with_align(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[MUL:%.*]] = mul i64 [[CONV]], 4
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test1_builtin_alloca_with_align_uninitialized(unsigned n) {
@@ -325,12 +317,11 @@ void test1_builtin_alloca_with_align_uninitialized(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca(
@@ -351,12 +342,11 @@ void test1_builtin_alloca_with_align_uninitialized(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test2_builtin_alloca(unsigned n) {
@@ -392,12 +382,11 @@ void test2_builtin_alloca(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_uninitialized(
@@ -418,12 +407,11 @@ void test2_builtin_alloca(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_UNINITIALIZED_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test2_builtin_alloca_uninitialized(unsigned n) {
@@ -459,12 +447,11 @@ void test2_builtin_alloca_uninitialized(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_with_align(
@@ -485,12 +472,11 @@ void test2_builtin_alloca_uninitialized(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test2_builtin_alloca_with_align(unsigned n) {
@@ -526,12 +512,11 @@ void test2_builtin_alloca_with_align(unsigned n) {
 // OPENCL20-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL20-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL20-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr
 // OPENCL20-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL20-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL20-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
-// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4
+// OPENCL20-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL20-NEXT:    ret void
 //
 // OPENCL30-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized(
@@ -552,12 +537,11 @@ void test2_builtin_alloca_with_align(unsigned n) {
 // OPENCL30GAS-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
 // OPENCL30GAS-NEXT:    [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
-// OPENCL30GAS-NEXT:    [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]] to ptr
 // OPENCL30GAS-NEXT:    store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
 // OPENCL30GAS-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
 // OPENCL30GAS-NEXT:    [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5)
-// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr [[ALLOC_PTR_ALIGN_UNINITIALIZED_ASCAST]], align 4
+// OPENCL30GAS-NEXT:    store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4
 // OPENCL30GAS-NEXT:    ret void
 //
 void test2_builtin_alloca_with_align_uninitialized(unsigned n) {
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index 2dba7fb719376..f7641280715c8 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -146,12 +146,11 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c)
 // CHECK-NEXT:    [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
 // CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
-// CHECK-NEXT:    [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr
 // CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.s.get.barrier.state(i32 [[TMP0]])
-// CHECK-NEXT:    store i32 [[TMP1]], ptr [[STATE_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[STATE]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(5) [[STATE]], align 4
 // CHECK-NEXT:    ret i32 [[TMP2]]
 //
 unsigned test_s_get_barrier_state(int a)
@@ -167,13 +166,12 @@ unsigned test_s_get_barrier_state(int a)
 // CHECK-NEXT:    [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
 // CHECK-NEXT:    [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
-// CHECK-NEXT:    [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr
 // CHECK-NEXT:    store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
 // CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]])
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[TMP2]], ptr addrspace(5) [[STATE]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(5) [[STATE]], align 4
 // CHECK-NEXT:    ret i32 [[TMP3]]
 //
 unsigned test_s_get_named_barrier_state(void *bar)
diff --git a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
index 86ca9ae509073..8845ffe499fde 100644
--- a/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
+++ b/clang/test/CodeGenOpenCL/implicit-addrspacecast-function-parameter.cl
@@ -32,11 +32,10 @@ __kernel void use_of_local_var()
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[X:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
 // CHECK-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR5:[0-9]+]]
-// CHECK-NEXT:    store i32 0, ptr [[X_ASCAST]], align 4, !tbaa [[TBAA4:![0-9]+]]
-// CHECK-NEXT:    [[X_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[X_ASCAST]] to ptr addrspace(5)
-// CHECK-NEXT:    call void @private_ptr(ptr addrspace(5) noundef [[X_ASCAST_ASCAST]]) #[[ATTR6:[0-9]+]]
+// CHECK-NEXT:    store i32 0, ptr addrspace(5) [[X]], align 4, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-NEXT:    call void @private_ptr(ptr addrspace(5) noundef [[X]]) #[[ATTR6:[0-9]+]]
+// CHECK-NEXT:    [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
 // CHECK-NEXT:    call void @generic_ptr(ptr noundef [[X_ASCAST]]) #[[ATTR6]]
 // CHECK-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[X]]) #[[ATTR5]]
 // CHECK-NEXT:    ret void
diff --git a/clang/test/Index/pipe-size.cl b/clang/test/Index/pipe-size.cl
index a48857baef1a6..f15bbefb68e7f 100644
--- a/clang/test/Index/pipe-size.cl
+++ b/clang/test/Index/pipe-size.cl
@@ -12,5 +12,5 @@ __kernel void testPipe( pipe int test )
     // SPIR64: store target("spirv.Pipe", 0) %test, ptr %test.addr, align 8
     // SPIR64: store i32 8, ptr %s, align 4
     // AMDGCN: store ptr addrspace(1) %test, ptr %test{{.*}}, align 8
-    // AMDGCN: store i32 8, ptr %s{{.*}}, align 4
+    // AMDGCN: store i32 8, ptr addrspace(5) %s{{.*}}, align 4
 }

>From 334c1abdb0bee488477f810ebf4cc1d41c31e653 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 8 May 2025 07:57:13 +0200
Subject: [PATCH 096/115] Add regression tests from ConstantData uselist
 removal (#138960)

Add some examples of failures after 87f312aad6ede636cd2de5d18f3058bf2caf5651
---
 .../X86/codegen-no-uselist-constantdata.ll    | 52 +++++++++++++++++++
 .../no-uselist-constantdata-regression.ll     | 30 +++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/codegen-no-uselist-constantdata.ll
 create mode 100644 llvm/test/Transforms/CorrelatedValuePropagation/no-uselist-constantdata-regression.ll

diff --git a/llvm/test/CodeGen/X86/codegen-no-uselist-constantdata.ll b/llvm/test/CodeGen/X86/codegen-no-uselist-constantdata.ll
new file mode 100644
index 0000000000000..fef2c18b30a82
--- /dev/null
+++ b/llvm/test/CodeGen/X86/codegen-no-uselist-constantdata.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; Make sure codegen doesn't try to inspect the use list of constants
+
+; Make sure we do not try to make use of the uselist of a constant
+; null when looking for the alignment of the pointer.
+define <2 x i32> @no_uselist_null_isDereferenceableAndAlignedPointer(i1 %arg0, ptr align(4) %arg) {
+; CHECK-LABEL: no_uselist_null_isDereferenceableAndAlignedPointer:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb $1, %dil
+; CHECK-NEXT:    cmoveq %rsi, %rax
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    retq
+  %select.ptr = select i1 %arg0, ptr null, ptr %arg
+  %load = load i32, ptr %select.ptr
+  %insert = insertelement <2 x i32> zeroinitializer, i32 %load, i64 0
+  ret <2 x i32> %insert
+}
+
+; Make sure we do not try to inspect the uselist of a constant null
+; when processing a memcpy
+define void @gep_nullptr_no_inspect_uselist(ptr %arg) {
+; CHECK-LABEL: gep_nullptr_no_inspect_uselist:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzbl 16, %eax
+; CHECK-NEXT:    movb %al, (%rdi)
+; CHECK-NEXT:    retq
+  %null_gep = getelementptr i8, ptr null, i64 16
+  call void @llvm.memcpy.p0.p0.i64(ptr %arg, ptr %null_gep, i64 1, i1 false)
+  ret void
+}
+
+define <16 x i8> @load_null_offset() {
+; CHECK-LABEL: load_null_offset:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzbl 11, %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    pslld $8, %xmm1
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; CHECK-NEXT:    retq
+  %gep.null = getelementptr i8, ptr null, i64 11
+  %load = load i8, ptr %gep.null, align 1
+  %insert = insertelement <16 x i8> zeroinitializer, i8 %load, i64 1
+  ret <16 x i8> %insert
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #0
+
+attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/no-uselist-constantdata-regression.ll b/llvm/test/Transforms/CorrelatedValuePropagation/no-uselist-constantdata-regression.ll
new file mode 100644
index 0000000000000..51be78788c454
--- /dev/null
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/no-uselist-constantdata-regression.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=correlated-propagation < %s | FileCheck %s
+
+; Test for regression after 87f312aad6e from trying to use the uselist of constantdata
+
+define ptr @_ZN4mlir6Region15getParentOfTypeINS_19FunctionOpInterfaceEEET_v() {
+; CHECK-LABEL: define ptr @_ZN4mlir6Region15getParentOfTypeINS_19FunctionOpInterfaceEEET_v() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALL_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] zeroinitializer, 0
+; CHECK-NEXT:    [[I2P:%.*]] = inttoptr i64 [[CALL_FCA_0_EXTRACT]] to ptr
+; CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne ptr [[I2P]], null
+; CHECK-NEXT:    br i1 [[CMP_I]], label %[[CLEANUP:.*]], label %[[DO_COND:.*]]
+; CHECK:       [[DO_COND]]:
+; CHECK-NEXT:    br label %[[CLEANUP]]
+; CHECK:       [[CLEANUP]]:
+; CHECK-NEXT:    ret ptr [[I2P]]
+;
+entry:
+  %call.fca.0.extract = extractvalue [2 x i64] zeroinitializer, 0
+  %i2p = inttoptr i64 %call.fca.0.extract to ptr
+  %cmp.i = icmp ne ptr %i2p, null
+  br i1 %cmp.i, label %cleanup, label %do.cond
+
+do.cond:                                          ; preds = %entry
+  br label %cleanup
+
+cleanup:                                          ; preds = %do.cond, %entry
+  %phi = phi ptr [ %i2p, %entry ], [ null, %do.cond ]
+  ret ptr %phi
+}

>From 9383fb23e18bb983d0024fb956a0a724ef9eb03d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 8 May 2025 08:00:09 +0200
Subject: [PATCH 097/115] Reapply "IR: Remove uselist for constantdata
 (#137313)" (#138961)

Reapply "IR: Remove uselist for constantdata (#137313)"

This reverts commit 5936c02c8b9c6d1476f7830517781ce8b6e26e75.

Fix checking uselists of constants in assume bundle queries
---
 llvm/docs/ReleaseNotes.md                     |   2 +
 llvm/include/llvm/IR/Constants.h              |   2 +
 llvm/include/llvm/IR/Use.h                    |  23 +---
 llvm/include/llvm/IR/Value.h                  | 118 ++++++++++++++----
 llvm/lib/Analysis/AssumeBundleQueries.cpp     |   4 +
 llvm/lib/Analysis/TypeMetadataUtils.cpp       |   3 +
 llvm/lib/AsmParser/LLParser.cpp               |   2 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |   4 +
 llvm/lib/Bitcode/Writer/ValueEnumerator.cpp   |   3 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |   2 +-
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |   3 +
 .../lib/CodeGen/ComplexDeinterleavingPass.cpp |   3 +
 llvm/lib/IR/AsmWriter.cpp                     |   9 +-
 llvm/lib/IR/Instruction.cpp                   |   4 +-
 llvm/lib/IR/Use.cpp                           |   8 +-
 llvm/lib/IR/Value.cpp                         |  26 ++--
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp |   2 +-
 llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp |  64 +++++-----
 .../InstCombineSimplifyDemanded.cpp           |   7 +-
 .../InstCombine/InstructionCombining.cpp      |   2 +-
 llvm/lib/Transforms/Scalar/Reassociate.cpp    |   3 +-
 .../test/Analysis/MemorySSA/nondeterminism.ll |   1 -
 .../llvm-diff/uselistorder-issue58629-gv.ll   |  14 +++
 .../llvm-diff/uselistorder-issue58629.ll      |   5 +-
 .../tools/llvm-reduce/bitcode-uselistorder.ll |  23 ++--
 .../uselistorder-invalid-ir-output.ll         |   6 +-
 .../verify-uselistorder.cpp                   |   9 ++
 polly/lib/Support/ScopHelper.cpp              |   3 +
 28 files changed, 251 insertions(+), 104 deletions(-)
 create mode 100644 llvm/test/tools/llvm-diff/uselistorder-issue58629-gv.ll

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 0ed1675533d03..504db733308c1 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -56,6 +56,8 @@ Makes programs 10x faster by doing Special New Thing.
 Changes to the LLVM IR
 ----------------------
 
+* It is no longer permitted to inspect the uses of ConstantData
+
 * The `nocapture` attribute has been replaced by `captures(none)`.
 * The constant expression variants of the following instructions have been
   removed:
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 88d005d1adbb1..ff51f59b6ec68 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -50,6 +50,8 @@ template <class ConstantClass> struct ConstantAggrKeyType;
 /// These constants have no operands; they represent their data directly.
 /// Since they can be in use by unrelated modules (and are never based on
 /// GlobalValues), it never makes sense to RAUW them.
+///
+/// These do not have use lists. It is illegal to inspect the uses.
 class ConstantData : public Constant {
   constexpr static IntrusiveOperandsAllocMarker AllocMarker{0};
 
diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h
index a86b9c46c1f69..bcd1fd6677497 100644
--- a/llvm/include/llvm/IR/Use.h
+++ b/llvm/include/llvm/IR/Use.h
@@ -23,6 +23,7 @@
 namespace llvm {
 
 template <typename> struct simplify_type;
+class ConstantData;
 class User;
 class Value;
 
@@ -42,10 +43,7 @@ class Use {
 
 private:
   /// Destructor - Only for zap()
-  ~Use() {
-    if (Val)
-      removeFromList();
-  }
+  ~Use();
 
   /// Constructor
   Use(User *Parent) : Parent(Parent) {}
@@ -87,19 +85,10 @@ class Use {
   Use **Prev = nullptr;
   User *Parent = nullptr;
 
-  void addToList(Use **List) {
-    Next = *List;
-    if (Next)
-      Next->Prev = &Next;
-    Prev = List;
-    *Prev = this;
-  }
-
-  void removeFromList() {
-    *Prev = Next;
-    if (Next)
-      Next->Prev = Prev;
-  }
+  inline void addToList(unsigned &Count);
+  inline void addToList(Use *&List);
+  inline void removeFromList(unsigned &Count);
+  inline void removeFromList(Use *&List);
 };
 
 /// Allow clients to treat uses just like values when using
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index bf1de7eef9932..180b6238eda6c 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -116,7 +116,10 @@ class Value {
 
 private:
   Type *VTy;
-  Use *UseList;
+  union {
+    Use *List = nullptr;
+    unsigned Count;
+  } Uses;
 
   friend class ValueAsMetadata; // Allow access to IsUsedByMD.
   friend class ValueHandleBase; // Allow access to HasValueHandle.
@@ -339,21 +342,28 @@ class Value {
 #endif
   }
 
+  /// Check if this Value has a use-list.
+  bool hasUseList() const { return !isa<ConstantData>(this); }
+
   bool use_empty() const {
     assertModuleIsMaterialized();
-    return UseList == nullptr;
+    return hasUseList() ? Uses.List == nullptr : Uses.Count == 0;
   }
 
   bool materialized_use_empty() const {
-    return UseList == nullptr;
+    return hasUseList() ? Uses.List == nullptr : !Uses.Count;
   }
 
   using use_iterator = use_iterator_impl<Use>;
   using const_use_iterator = use_iterator_impl<const Use>;
 
-  use_iterator materialized_use_begin() { return use_iterator(UseList); }
+  use_iterator materialized_use_begin() {
+    assert(hasUseList());
+    return use_iterator(Uses.List);
+  }
   const_use_iterator materialized_use_begin() const {
-    return const_use_iterator(UseList);
+    assert(hasUseList());
+    return const_use_iterator(Uses.List);
   }
   use_iterator use_begin() {
     assertModuleIsMaterialized();
@@ -380,17 +390,18 @@ class Value {
     return materialized_uses();
   }
 
-  bool user_empty() const {
-    assertModuleIsMaterialized();
-    return UseList == nullptr;
-  }
+  bool user_empty() const { return use_empty(); }
 
   using user_iterator = user_iterator_impl<User>;
   using const_user_iterator = user_iterator_impl<const User>;
 
-  user_iterator materialized_user_begin() { return user_iterator(UseList); }
+  user_iterator materialized_user_begin() {
+    assert(hasUseList());
+    return user_iterator(Uses.List);
+  }
   const_user_iterator materialized_user_begin() const {
-    return const_user_iterator(UseList);
+    assert(hasUseList());
+    return const_user_iterator(Uses.List);
   }
   user_iterator user_begin() {
     assertModuleIsMaterialized();
@@ -429,7 +440,11 @@ class Value {
   ///
   /// This is specialized because it is a common request and does not require
   /// traversing the whole use list.
-  bool hasOneUse() const { return hasSingleElement(uses()); }
+  bool hasOneUse() const {
+    if (!hasUseList())
+      return Uses.Count == 1;
+    return hasSingleElement(uses());
+  }
 
   /// Return true if this Value has exactly N uses.
   bool hasNUses(unsigned N) const;
@@ -491,6 +506,8 @@ class Value {
   static void dropDroppableUse(Use &U);
 
   /// Check if this value is used in the specified basic block.
+  ///
+  /// Not supported for ConstantData.
   bool isUsedInBasicBlock(const BasicBlock *BB) const;
 
   /// This method computes the number of uses of this Value.
@@ -500,7 +517,19 @@ class Value {
   unsigned getNumUses() const;
 
   /// This method should only be used by the Use class.
-  void addUse(Use &U) { U.addToList(&UseList); }
+  void addUse(Use &U) {
+    if (hasUseList())
+      U.addToList(Uses.List);
+    else
+      U.addToList(Uses.Count);
+  }
+
+  void removeUse(Use &U) {
+    if (hasUseList())
+      U.removeFromList(Uses.List);
+    else
+      U.removeFromList(Uses.Count);
+  }
 
   /// Concrete subclass of this.
   ///
@@ -841,7 +870,8 @@ class Value {
   ///
   /// \return the first element in the list.
   ///
-  /// \note Completely ignores \a Use::Prev (doesn't read, doesn't update).
+  /// \note Completely ignores \a Use::PrevOrCount (doesn't read, doesn't
+  /// update).
   template <class Compare>
   static Use *mergeUseLists(Use *L, Use *R, Compare Cmp) {
     Use *Merged;
@@ -887,10 +917,50 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) {
   return OS;
 }
 
+inline Use::~Use() {
+  if (Val)
+    Val->removeUse(*this);
+}
+
+void Use::addToList(unsigned &Count) {
+  assert(isa<ConstantData>(Val) && "Only ConstantData is ref-counted");
+  ++Count;
+
+  // We don't have a uselist - clear the remnant if we are replacing a
+  // non-constant value.
+  Prev = nullptr;
+  Next = nullptr;
+}
+
+void Use::addToList(Use *&List) {
+  assert(!isa<ConstantData>(Val) && "ConstantData has no use-list");
+
+  Next = List;
+  if (Next)
+    Next->Prev = &Next;
+  Prev = &List;
+  List = this;
+}
+
+void Use::removeFromList(unsigned &Count) {
+  assert(isa<ConstantData>(Val));
+  assert(Count > 0 && "reference count underflow");
+  assert(!Prev && !Next && "should not have uselist remnant");
+  --Count;
+}
+
+void Use::removeFromList(Use *&List) {
+  *Prev = Next;
+  if (Next)
+    Next->Prev = Prev;
+}
+
 void Use::set(Value *V) {
-  if (Val) removeFromList();
+  if (Val)
+    Val->removeUse(*this);
   Val = V;
-  if (V) V->addUse(*this);
+  if (V)
+    V->addUse(*this);
 }
 
 Value *Use::operator=(Value *RHS) {
@@ -904,7 +974,7 @@ const Use &Use::operator=(const Use &RHS) {
 }
 
 template <class Compare> void Value::sortUseList(Compare Cmp) {
-  if (!UseList || !UseList->Next)
+  if (!hasUseList() || !Uses.List || !Uses.List->Next)
     // No need to sort 0 or 1 uses.
     return;
 
@@ -917,10 +987,10 @@ template <class Compare> void Value::sortUseList(Compare Cmp) {
   Use *Slots[MaxSlots];
 
   // Collect the first use, turning it into a single-item list.
-  Use *Next = UseList->Next;
-  UseList->Next = nullptr;
+  Use *Next = Uses.List->Next;
+  Uses.List->Next = nullptr;
   unsigned NumSlots = 1;
-  Slots[0] = UseList;
+  Slots[0] = Uses.List;
 
   // Collect all but the last use.
   while (Next->Next) {
@@ -956,15 +1026,15 @@ template <class Compare> void Value::sortUseList(Compare Cmp) {
   // Merge all the lists together.
   assert(Next && "Expected one more Use");
   assert(!Next->Next && "Expected only one Use");
-  UseList = Next;
+  Uses.List = Next;
   for (unsigned I = 0; I < NumSlots; ++I)
     if (Slots[I])
-      // Since the uses in Slots[I] originally preceded those in UseList, send
+      // Since the uses in Slots[I] originally preceded those in Uses.List, send
       // Slots[I] in as the left parameter to maintain a stable sort.
-      UseList = mergeUseLists(Slots[I], UseList, Cmp);
+      Uses.List = mergeUseLists(Slots[I], Uses.List, Cmp);
 
   // Fix the Prev pointers.
-  for (Use *I = UseList, **Prev = &UseList; I; I = I->Next) {
+  for (Use *I = Uses.List, **Prev = &Uses.List; I; I = I->Next) {
     I->Prev = Prev;
     Prev = &I->Next;
   }
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index c27bfa6f3cc2c..b37b2270bbec5 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -180,6 +180,10 @@ llvm::getKnowledgeForValue(const Value *V,
     }
     return RetainedKnowledge::none();
   }
+
+  if (!V->hasUseList())
+    return RetainedKnowledge::none();
+
   for (const auto &U : V->uses()) {
     CallInst::BundleOpInfo* Bundle = getBundleFromUse(&U);
     if (!Bundle)
diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 9ec0785eb5034..8099fbc3daeda 100644
--- a/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -54,6 +54,9 @@ findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
 static void findLoadCallsAtConstantOffset(
     const Module *M, SmallVectorImpl<DevirtCallSite> &DevirtCalls, Value *VPtr,
     int64_t Offset, const CallInst *CI, DominatorTree &DT) {
+  if (!VPtr->hasUseList())
+    return;
+
   for (const Use &U : VPtr->uses()) {
     Value *User = U.getUser();
     if (isa<BitCastInst>(User)) {
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index fc7f4601331df..96f86eb52f15c 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8869,6 +8869,8 @@ bool LLParser::parseMDNodeVector(SmallVectorImpl<Metadata *> &Elts) {
 //===----------------------------------------------------------------------===//
 bool LLParser::sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes,
                                 SMLoc Loc) {
+  if (!V->hasUseList())
+    return false;
   if (V->use_empty())
     return error(Loc, "value has no uses");
 
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index b533731e8dda3..de7e9bbe69bd4 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3859,6 +3859,10 @@ Error BitcodeReader::parseUseLists() {
         V = FunctionBBs[ID];
       } else
         V = ValueList[ID];
+
+      if (!V->hasUseList())
+        break;
+
       unsigned NumUses = 0;
       SmallDenseMap<const Use *, unsigned, 16> Order;
       for (const Use &U : V->materialized_uses()) {
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 9f735f77d29dc..1fdb8080eab0a 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -230,6 +230,9 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
 
 static void predictValueUseListOrder(const Value *V, const Function *F,
                                      OrderMap &OM, UseListOrderStack &Stack) {
+  if (!V->hasUseList())
+    return;
+
   auto &IDPair = OM[V];
   assert(IDPair.first && "Unmapped value");
   if (IDPair.second)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index eb076960a5def..889e24a3f70ad 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -4004,7 +4004,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
   // Globals with sub-elements such as combinations of arrays and structs
   // are handled recursively by emitGlobalConstantImpl. Keep track of the
   // constant symbol base and the current position with BaseCV and Offset.
-  if (!BaseCV && CV->hasOneUse())
+  if (!isa<ConstantData>(CV) && !BaseCV && CV->hasOneUse())
     BaseCV = dyn_cast<Constant>(CV->user_back());
 
   if (isa<ConstantAggregateZero>(CV)) {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index f9dcb472ed1d2..2c53a9c27ccb2 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -8591,6 +8591,9 @@ static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
     return false;
 
   Value *X = Cmp->getOperand(0);
+  if (!X->hasUseList())
+    return false;
+
   APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
 
   for (auto *U : X->users()) {
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index f4fe0b3970d4c..90c6c28c3c706 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -1034,6 +1034,9 @@ ComplexDeinterleavingGraph::identifyPartialReduction(Value *R, Value *I) {
   if (!isa<VectorType>(R->getType()) || !isa<VectorType>(I->getType()))
     return nullptr;
 
+  if (!R->hasUseList() || !I->hasUseList())
+    return nullptr;
+
   auto CommonUser =
       findCommonBetweenCollections<Value *>(R->users(), I->users());
   if (!CommonUser)
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 12edf6fcd510c..610cbcb1a9b6b 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -125,11 +125,15 @@ static void orderValue(const Value *V, OrderMap &OM) {
   if (OM.lookup(V))
     return;
 
-  if (const Constant *C = dyn_cast<Constant>(V))
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    if (isa<ConstantData>(C))
+      return;
+
     if (C->getNumOperands() && !isa<GlobalValue>(C))
       for (const Value *Op : C->operands())
         if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op))
           orderValue(Op, OM);
+  }
 
   // Note: we cannot cache this lookup above, since inserting into the map
   // changes the map's size, and thus affects the other IDs.
@@ -275,7 +279,8 @@ static UseListOrderMap predictUseListOrder(const Module *M) {
   UseListOrderMap ULOM;
   for (const auto &Pair : OM) {
     const Value *V = Pair.first;
-    if (V->use_empty() || std::next(V->use_begin()) == V->use_end())
+    if (!V->hasUseList() || V->use_empty() ||
+        std::next(V->use_begin()) == V->use_end())
       continue;
 
     std::vector<unsigned> Shuffle =
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 6f858110fb8ce..258681382f9e5 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -373,7 +373,9 @@ std::optional<BasicBlock::iterator> Instruction::getInsertionPointAfterDef() {
 }
 
 bool Instruction::isOnlyUserOfAnyOperand() {
-  return any_of(operands(), [](Value *V) { return V->hasOneUser(); });
+  return any_of(operands(), [](const Value *V) {
+    return V->hasUseList() && V->hasOneUser();
+  });
 }
 
 void Instruction::setHasNoUnsignedWrap(bool b) {
diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp
index 99a89386d75f9..67882ba0144b4 100644
--- a/llvm/lib/IR/Use.cpp
+++ b/llvm/lib/IR/Use.cpp
@@ -19,11 +19,15 @@ void Use::swap(Use &RHS) {
   std::swap(Next, RHS.Next);
   std::swap(Prev, RHS.Prev);
 
-  *Prev = this;
+  if (Prev)
+    *Prev = this;
+
   if (Next)
     Next->Prev = &Next;
 
-  *RHS.Prev = &RHS;
+  if (RHS.Prev)
+    *RHS.Prev = &RHS;
+
   if (RHS.Next)
     RHS.Next->Prev = &RHS.Next;
 }
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index aa97b70f21aeb..74a96051f33af 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -53,7 +53,7 @@ static inline Type *checkType(Type *Ty) {
 Value::Value(Type *ty, unsigned scid)
     : SubclassID(scid), HasValueHandle(0), SubclassOptionalData(0),
       SubclassData(0), NumUserOperands(0), IsUsedByMD(false), HasName(false),
-      HasMetadata(false), VTy(checkType(ty)), UseList(nullptr) {
+      HasMetadata(false), VTy(checkType(ty)) {
   static_assert(ConstantFirstVal == 0, "!(SubclassID < ConstantFirstVal)");
   // FIXME: Why isn't this in the subclass gunk??
   // Note, we cannot call isa<CallInst> before the CallInst has been
@@ -148,10 +148,14 @@ void Value::destroyValueName() {
 }
 
 bool Value::hasNUses(unsigned N) const {
+  if (!hasUseList())
+    return Uses.Count == N;
   return hasNItems(use_begin(), use_end(), N);
 }
 
 bool Value::hasNUsesOrMore(unsigned N) const {
+  if (!hasUseList())
+    return Uses.Count >= N;
   return hasNItemsOrMore(use_begin(), use_end(), N);
 }
 
@@ -232,6 +236,8 @@ void Value::dropDroppableUse(Use &U) {
 }
 
 bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+  assert(hasUseList() && "ConstantData has no use-list");
+
   // This can be computed either by scanning the instructions in BB, or by
   // scanning the use list of this Value. Both lists can be very long, but
   // usually one is quite short.
@@ -253,6 +259,9 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
 }
 
 unsigned Value::getNumUses() const {
+  if (!hasUseList())
+    return Uses.Count;
+
   return (unsigned)std::distance(use_begin(), use_end());
 }
 
@@ -499,6 +508,7 @@ static bool contains(Value *Expr, Value *V) {
 #endif // NDEBUG
 
 void Value::doRAUW(Value *New, ReplaceMetadataUses ReplaceMetaUses) {
+  assert(hasUseList() && "Cannot replace constant data");
   assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
   assert(!contains(New, this) &&
          "this->replaceAllUsesWith(expr(this)) is NOT valid!");
@@ -512,7 +522,7 @@ void Value::doRAUW(Value *New, ReplaceMetadataUses ReplaceMetaUses) {
     ValueAsMetadata::handleRAUW(this, New);
 
   while (!materialized_use_empty()) {
-    Use &U = *UseList;
+    Use &U = *Uses.List;
     // Must handle Constants specially, we cannot call replaceUsesOfWith on a
     // constant because they are uniqued.
     if (auto *C = dyn_cast<Constant>(U.getUser())) {
@@ -844,7 +854,7 @@ bool Value::canBeFreed() const {
   // which is why we need the explicit opt in on a per collector basis.
   if (!F->hasGC())
     return true;
-  
+
   const auto &GCName = F->getGC();
   if (GCName == "statepoint-example") {
     auto *PT = cast<PointerType>(this->getType());
@@ -1092,12 +1102,12 @@ const Value *Value::DoPHITranslation(const BasicBlock *CurBB,
 LLVMContext &Value::getContext() const { return VTy->getContext(); }
 
 void Value::reverseUseList() {
-  if (!UseList || !UseList->Next)
+  if (!Uses.List || !Uses.List->Next || !hasUseList())
     // No need to reverse 0 or 1 uses.
     return;
 
-  Use *Head = UseList;
-  Use *Current = UseList->Next;
+  Use *Head = Uses.List;
+  Use *Current = Uses.List->Next;
   Head->Next = nullptr;
   while (Current) {
     Use *Next = Current->Next;
@@ -1106,8 +1116,8 @@ void Value::reverseUseList() {
     Head = Current;
     Current = Next;
   }
-  UseList = Head;
-  Head->Prev = &UseList;
+  Uses.List = Head;
+  Head->Prev = &Uses.List;
 }
 
 bool Value::isSwiftError() const {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 8d83fef265e6f..6bd3fd182485d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -633,7 +633,7 @@ bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
     // Look at the first element of the array to determine its type
     if (isa<ArrayType>(EltTy))
       EltTy = EltTy->getArrayElementType();
-  } else {
+  } else if (!isa<Constant>(LdVal)) {
     // FIXME: grubbing around uses is pretty ugly, but with no more
     // `getPointerElementType` there's not much else we can do.
     for (const auto *LdUser : LdVal->users()) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 4325023406c7c..22fc1ca2c4c2d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -1124,7 +1124,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(
       continue;
     Value *OpTyVal = getNormalizedPoisonValue(KnownElemTy);
     Type *OpTy = Op->getType();
-    if (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)) {
+    if (Op->hasUseList() &&
+        (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op))) {
       Type *PrevElemTy = GR->findDeducedElementType(Op);
       GR->addDeducedElementType(Op, normalizeType(KnownElemTy));
       // check if KnownElemTy is complete
@@ -1474,34 +1475,36 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
   // Do not emit new spv_ptrcast if equivalent one already exists or when
   // spv_assign_ptr_type already targets this pointer with the same element
   // type.
-  for (auto User : Pointer->users()) {
-    auto *II = dyn_cast<IntrinsicInst>(User);
-    if (!II ||
-        (II->getIntrinsicID() != Intrinsic::spv_assign_ptr_type &&
-         II->getIntrinsicID() != Intrinsic::spv_ptrcast) ||
-        II->getOperand(0) != Pointer)
-      continue;
+  if (Pointer->hasUseList()) {
+    for (auto User : Pointer->users()) {
+      auto *II = dyn_cast<IntrinsicInst>(User);
+      if (!II ||
+          (II->getIntrinsicID() != Intrinsic::spv_assign_ptr_type &&
+           II->getIntrinsicID() != Intrinsic::spv_ptrcast) ||
+          II->getOperand(0) != Pointer)
+        continue;
 
-    // There is some spv_ptrcast/spv_assign_ptr_type already targeting this
-    // pointer.
-    FirstPtrCastOrAssignPtrType = false;
-    if (II->getOperand(1) != VMD ||
-        dyn_cast<ConstantInt>(II->getOperand(2))->getSExtValue() !=
-            AddressSpace)
-      continue;
+      // There is some spv_ptrcast/spv_assign_ptr_type already targeting this
+      // pointer.
+      FirstPtrCastOrAssignPtrType = false;
+      if (II->getOperand(1) != VMD ||
+          dyn_cast<ConstantInt>(II->getOperand(2))->getSExtValue() !=
+              AddressSpace)
+        continue;
 
-    // The spv_ptrcast/spv_assign_ptr_type targeting this pointer is of the same
-    // element type and address space.
-    if (II->getIntrinsicID() != Intrinsic::spv_ptrcast)
-      return;
+      // The spv_ptrcast/spv_assign_ptr_type targeting this pointer is of the
+      // same element type and address space.
+      if (II->getIntrinsicID() != Intrinsic::spv_ptrcast)
+        return;
 
-    // This must be a spv_ptrcast, do not emit new if this one has the same BB
-    // as I. Otherwise, search for other spv_ptrcast/spv_assign_ptr_type.
-    if (II->getParent() != I->getParent())
-      continue;
+      // This must be a spv_ptrcast, do not emit new if this one has the same BB
+      // as I. Otherwise, search for other spv_ptrcast/spv_assign_ptr_type.
+      if (II->getParent() != I->getParent())
+        continue;
 
-    I->setOperand(OperandToReplace, II);
-    return;
+      I->setOperand(OperandToReplace, II);
+      return;
+    }
   }
 
   if (isa<Instruction>(Pointer) || isa<Argument>(Pointer)) {
@@ -2490,10 +2493,13 @@ bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) {
         }
       }
     }
-    for (User *U : Op->users()) {
-      Instruction *Inst = dyn_cast<Instruction>(U);
-      if (Inst && !isa<IntrinsicInst>(Inst))
-        ToProcess[Inst].insert(Op);
+
+    if (Op->hasUseList()) {
+      for (User *U : Op->users()) {
+        Instruction *Inst = dyn_cast<Instruction>(U);
+        if (Inst && !isa<IntrinsicInst>(Inst))
+          ToProcess[Inst].insert(Op);
+      }
     }
   }
   if (TodoTypeSz == 0)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a48854a191cae..b5c1ee0e01cd0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1894,9 +1894,14 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
         // Try to use shuffle-of-operand in place of an operand:
         // bo X, Y --> bo (shuf X), Y
         // bo X, Y --> bo X, (shuf Y)
+
+        Value *OtherOp = MatchShufAsOp0 ? Y : X;
+        if (!OtherOp->hasUseList())
+          return nullptr;
+
         BinaryOperator::BinaryOps Opcode = BO->getOpcode();
         Value *ShufOp = MatchShufAsOp0 ? X : Y;
-        Value *OtherOp = MatchShufAsOp0 ? Y : X;
+
         for (User *U : OtherOp->users()) {
           ArrayRef<int> Mask;
           auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_Mask(Mask));
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d30c609b08a82..206d41e30db2c 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1831,7 +1831,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
     // Handle some cases that can't be fully simplified, but where we know that
     // the two instructions will fold into one.
     auto WillFold = [&]() {
-      if (!InVal->hasOneUser())
+      if (!InVal->hasUseList() || !InVal->hasOneUser())
         return false;
 
       // icmp of ucmp/scmp with constant will fold to icmp.
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 97f9829ce54c5..cb7a9ef9b6711 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -439,7 +439,8 @@ static bool LinearizeExprTree(Instruction *I,
     for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
       Value *Op = I->getOperand(OpIdx);
       LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
-      assert(!Op->use_empty() && "No uses, so how did we get to it?!");
+      assert((!Op->hasUseList() || !Op->use_empty()) &&
+             "No uses, so how did we get to it?!");
 
       // If this is a binary operation of the right kind with only one use then
       // add its operands to the expression.
diff --git a/llvm/test/Analysis/MemorySSA/nondeterminism.ll b/llvm/test/Analysis/MemorySSA/nondeterminism.ll
index 90902e36b5d58..11b9703cd0dd4 100644
--- a/llvm/test/Analysis/MemorySSA/nondeterminism.ll
+++ b/llvm/test/Analysis/MemorySSA/nondeterminism.ll
@@ -1,7 +1,6 @@
 ; RUN: opt -passes=simplifycfg -S --preserve-ll-uselistorder %s | FileCheck %s
 ; REQUIRES: x86-registered-target
 ; CHECK-LABEL: @n
-; CHECK: uselistorder i16 0, { 3, 2, 4, 1, 5, 0, 6 }
 
 ; Note: test was added in an effort to ensure determinism when updating memoryssa. See PR42574.
 ; If the uselistorder check becomes no longer relevant, the test can be disabled or removed.
diff --git a/llvm/test/tools/llvm-diff/uselistorder-issue58629-gv.ll b/llvm/test/tools/llvm-diff/uselistorder-issue58629-gv.ll
new file mode 100644
index 0000000000000..33216bb8d914e
--- /dev/null
+++ b/llvm/test/tools/llvm-diff/uselistorder-issue58629-gv.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-diff %s %s | count 0
+; Make sure there is no error produced by using uselistorder with two
+; modules using the same constant/global in the same context.
+
+ at gv = addrspace(4) global [2 x i64] zeroinitializer, align 16
+
+define void @func() {
+entry:
+  %gep0 = getelementptr inbounds i8, ptr addrspace(4) @gv, i64 12
+  %gep1 = getelementptr i8, ptr addrspace(4) @gv, i64 4
+  ret void
+}
+
+uselistorder ptr addrspace(4) @gv, { 1, 0 }
diff --git a/llvm/test/tools/llvm-diff/uselistorder-issue58629.ll b/llvm/test/tools/llvm-diff/uselistorder-issue58629.ll
index e89fc7a3ea100..d50b0dcb7972d 100644
--- a/llvm/test/tools/llvm-diff/uselistorder-issue58629.ll
+++ b/llvm/test/tools/llvm-diff/uselistorder-issue58629.ll
@@ -1,5 +1,6 @@
-; XFAIL: *
-; RUN: llvm-diff %s %s
+; RUN: llvm-diff %s %s | count 0
+; Make sure there is no error produced by using uselistorder with two
+; modules using the same constant in the same context.
 
 define void @func() {
 entry:
diff --git a/llvm/test/tools/llvm-reduce/bitcode-uselistorder.ll b/llvm/test/tools/llvm-reduce/bitcode-uselistorder.ll
index ac98d75ef2d3b..4e8d1cf746441 100644
--- a/llvm/test/tools/llvm-reduce/bitcode-uselistorder.ll
+++ b/llvm/test/tools/llvm-reduce/bitcode-uselistorder.ll
@@ -11,20 +11,21 @@
 
 ; RUN: FileCheck -check-prefix=RESULT %s < %t.reduced.ll
 
+ at gv = external global i32, align 4
 
-; INTERESTING: add
-; INTERESTING: add
-; INTERESTING: add
-define i32 @func(i32 %arg0, i32 %arg1) {
+; INTERESTING: getelementptr
+; INTERESTING: getelementptr
+; INTERESTING: getelementptr
+define ptr @func(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) {
 entry:
-  %add0 = add i32 %arg0, 0
-  %add1 = add i32 %add0, 0
-  %add2 = add i32 %add1, 0
-  %add3 = add i32 %arg1, 0
-  %add4 = add i32 %add2, %add3
-  ret i32 %add4
+  %add0 = getelementptr i8, ptr @gv, i32 %arg0
+  %add1 = getelementptr i8, ptr @gv, i32 %arg1
+  %add2 = getelementptr i8, ptr @gv, i32 %arg2
+  %add3 = getelementptr i8, ptr @gv, i32 %arg3
+  %add4 = getelementptr i8, ptr @gv, i32 %arg4
+  ret ptr %add4
 }
 
 ; INTERESTING: uselistorder
 ; RESULT: uselistorder
-uselistorder i32 0, { 3, 2, 1, 0 }
+uselistorder ptr @gv, { 3, 2, 4, 1, 0 }
diff --git a/llvm/test/tools/llvm-reduce/uselistorder-invalid-ir-output.ll b/llvm/test/tools/llvm-reduce/uselistorder-invalid-ir-output.ll
index 4bc862bdaed26..0e9c32120f763 100644
--- a/llvm/test/tools/llvm-reduce/uselistorder-invalid-ir-output.ll
+++ b/llvm/test/tools/llvm-reduce/uselistorder-invalid-ir-output.ll
@@ -7,10 +7,11 @@
 ; RUN:   --test-arg %s
 
 ; Check if the final output really parses
-; RUN: not llvm-as -o /dev/null %t.reduced.ll
+; RUN: llvm-as -o /dev/null %t.reduced.ll
 ; RUN: FileCheck --check-prefix=RESULT %s < %t.reduced.ll
 
 
+; RESULT-LABEL: define void @kernel_ocl_path_trace_direct_lighting(
 define void @kernel_ocl_path_trace_direct_lighting(i1 %cond.i, i1 %cmp5.i.i, i32 %arg) {
 ; INTERESTING: entry:
 ; INTERESTING: 0
@@ -48,4 +49,5 @@ kernel_direct_lighting.exit:
   ret void
 }
 
-; RESULT: uselistorder i32 0, { 4, 0, 5, 1, 6, 2, 7, 3 }
+; FIXME: Should probably fix test to use a global address
+; RESULT-NOT: uselistorder
diff --git a/llvm/tools/verify-uselistorder/verify-uselistorder.cpp b/llvm/tools/verify-uselistorder/verify-uselistorder.cpp
index c2810b9579c15..be8ab4738d0eb 100644
--- a/llvm/tools/verify-uselistorder/verify-uselistorder.cpp
+++ b/llvm/tools/verify-uselistorder/verify-uselistorder.cpp
@@ -245,6 +245,9 @@ ValueMapping::ValueMapping(const Module &M) {
 }
 
 void ValueMapping::map(const Value *V) {
+  if (!V->hasUseList())
+    return;
+
   if (IDs.lookup(V))
     return;
 
@@ -395,6 +398,9 @@ static void verifyUseListOrder(const Module &M) {
 
 static void shuffleValueUseLists(Value *V, std::minstd_rand0 &Gen,
                                  DenseSet<Value *> &Seen) {
+  if (!V->hasUseList())
+    return;
+
   if (!Seen.insert(V).second)
     return;
 
@@ -437,6 +443,9 @@ static void shuffleValueUseLists(Value *V, std::minstd_rand0 &Gen,
 }
 
 static void reverseValueUseLists(Value *V, DenseSet<Value *> &Seen) {
+  if (!V->hasUseList())
+    return;
+
   if (!Seen.insert(V).second)
     return;
 
diff --git a/polly/lib/Support/ScopHelper.cpp b/polly/lib/Support/ScopHelper.cpp
index 73c5d95deef58..a2328d1bbb3cf 100644
--- a/polly/lib/Support/ScopHelper.cpp
+++ b/polly/lib/Support/ScopHelper.cpp
@@ -601,6 +601,9 @@ bool polly::isHoistableLoad(LoadInst *LInst, Region &R, LoopInfo &LI,
     L = L->getParentLoop();
   }
 
+  if (!Ptr->hasUseList())
+    return true;
+
   for (auto *User : Ptr->users()) {
     auto *UserI = dyn_cast<Instruction>(User);
     if (!UserI || UserI->getFunction() != LInst->getFunction() ||

>From 4d60c6d9b2c863d773aac9b59af8780e5ba23fcd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 8 May 2025 08:02:54 +0200
Subject: [PATCH 098/115] Reapply "IR: Remove reference counts from
 ConstantData (#137314)" (#138962)

This reverts commit 0274232b87177779e5c985eca06df22bf140f6cb.
---
 llvm/docs/ReleaseNotes.md                  |  4 +-
 llvm/include/llvm/IR/Constants.h           |  3 +-
 llvm/include/llvm/IR/Use.h                 | 26 ++++--
 llvm/include/llvm/IR/Value.h               | 98 +++++-----------------
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp |  2 +-
 llvm/lib/IR/AsmWriter.cpp                  |  3 +-
 llvm/lib/IR/Instruction.cpp                |  4 +-
 llvm/lib/IR/Value.cpp                      | 30 ++++---
 llvm/unittests/IR/ConstantsTest.cpp        | 38 +++++++++
 9 files changed, 103 insertions(+), 105 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 504db733308c1..05318362b99c9 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -56,7 +56,9 @@ Makes programs 10x faster by doing Special New Thing.
 Changes to the LLVM IR
 ----------------------
 
-* It is no longer permitted to inspect the uses of ConstantData
+* It is no longer permitted to inspect the uses of ConstantData. Use
+  count APIs will behave as if they have no uses (i.e. use_empty() is
+  always true).
 
 * The `nocapture` attribute has been replaced by `captures(none)`.
 * The constant expression variants of the following instructions have been
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index ff51f59b6ec68..76efa9bd63522 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -51,7 +51,8 @@ template <class ConstantClass> struct ConstantAggrKeyType;
 /// Since they can be in use by unrelated modules (and are never based on
 /// GlobalValues), it never makes sense to RAUW them.
 ///
-/// These do not have use lists. It is illegal to inspect the uses.
+/// These do not have use lists. It is illegal to inspect the uses. These behave
+/// as if they have no uses (i.e. use_empty() is always true).
 class ConstantData : public Constant {
   constexpr static IntrusiveOperandsAllocMarker AllocMarker{0};
 
diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h
index bcd1fd6677497..0d5d878e4689f 100644
--- a/llvm/include/llvm/IR/Use.h
+++ b/llvm/include/llvm/IR/Use.h
@@ -23,7 +23,6 @@
 namespace llvm {
 
 template <typename> struct simplify_type;
-class ConstantData;
 class User;
 class Value;
 
@@ -43,7 +42,7 @@ class Use {
 
 private:
   /// Destructor - Only for zap()
-  ~Use();
+  ~Use() { removeFromList(); }
 
   /// Constructor
   Use(User *Parent) : Parent(Parent) {}
@@ -85,10 +84,25 @@ class Use {
   Use **Prev = nullptr;
   User *Parent = nullptr;
 
-  inline void addToList(unsigned &Count);
-  inline void addToList(Use *&List);
-  inline void removeFromList(unsigned &Count);
-  inline void removeFromList(Use *&List);
+  void addToList(Use **List) {
+    Next = *List;
+    if (Next)
+      Next->Prev = &Next;
+    Prev = List;
+    *Prev = this;
+  }
+
+  void removeFromList() {
+    if (Prev) {
+      *Prev = Next;
+      if (Next) {
+        Next->Prev = Prev;
+        Next = nullptr;
+      }
+
+      Prev = nullptr;
+    }
+  }
 };
 
 /// Allow clients to treat uses just like values when using
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 180b6238eda6c..241b9e2860c4c 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -116,10 +116,7 @@ class Value {
 
 private:
   Type *VTy;
-  union {
-    Use *List = nullptr;
-    unsigned Count;
-  } Uses;
+  Use *UseList = nullptr;
 
   friend class ValueAsMetadata; // Allow access to IsUsedByMD.
   friend class ValueHandleBase; // Allow access to HasValueHandle.
@@ -347,23 +344,21 @@ class Value {
 
   bool use_empty() const {
     assertModuleIsMaterialized();
-    return hasUseList() ? Uses.List == nullptr : Uses.Count == 0;
+    return UseList == nullptr;
   }
 
-  bool materialized_use_empty() const {
-    return hasUseList() ? Uses.List == nullptr : !Uses.Count;
-  }
+  bool materialized_use_empty() const { return UseList == nullptr; }
 
   using use_iterator = use_iterator_impl<Use>;
   using const_use_iterator = use_iterator_impl<const Use>;
 
   use_iterator materialized_use_begin() {
     assert(hasUseList());
-    return use_iterator(Uses.List);
+    return use_iterator(UseList);
   }
   const_use_iterator materialized_use_begin() const {
     assert(hasUseList());
-    return const_use_iterator(Uses.List);
+    return const_use_iterator(UseList);
   }
   use_iterator use_begin() {
     assertModuleIsMaterialized();
@@ -397,11 +392,11 @@ class Value {
 
   user_iterator materialized_user_begin() {
     assert(hasUseList());
-    return user_iterator(Uses.List);
+    return user_iterator(UseList);
   }
   const_user_iterator materialized_user_begin() const {
     assert(hasUseList());
-    return const_user_iterator(Uses.List);
+    return const_user_iterator(UseList);
   }
   user_iterator user_begin() {
     assertModuleIsMaterialized();
@@ -440,11 +435,7 @@ class Value {
   ///
   /// This is specialized because it is a common request and does not require
   /// traversing the whole use list.
-  bool hasOneUse() const {
-    if (!hasUseList())
-      return Uses.Count == 1;
-    return hasSingleElement(uses());
-  }
+  bool hasOneUse() const { return UseList && hasSingleElement(uses()); }
 
   /// Return true if this Value has exactly N uses.
   bool hasNUses(unsigned N) const;
@@ -518,17 +509,8 @@ class Value {
 
   /// This method should only be used by the Use class.
   void addUse(Use &U) {
-    if (hasUseList())
-      U.addToList(Uses.List);
-    else
-      U.addToList(Uses.Count);
-  }
-
-  void removeUse(Use &U) {
-    if (hasUseList())
-      U.removeFromList(Uses.List);
-    else
-      U.removeFromList(Uses.Count);
+    if (UseList || hasUseList())
+      U.addToList(&UseList);
   }
 
   /// Concrete subclass of this.
@@ -870,8 +852,7 @@ class Value {
   ///
   /// \return the first element in the list.
   ///
-  /// \note Completely ignores \a Use::PrevOrCount (doesn't read, doesn't
-  /// update).
+  /// \note Completely ignores \a Use::Prev (doesn't read, doesn't update).
   template <class Compare>
   static Use *mergeUseLists(Use *L, Use *R, Compare Cmp) {
     Use *Merged;
@@ -917,47 +898,8 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) {
   return OS;
 }
 
-inline Use::~Use() {
-  if (Val)
-    Val->removeUse(*this);
-}
-
-void Use::addToList(unsigned &Count) {
-  assert(isa<ConstantData>(Val) && "Only ConstantData is ref-counted");
-  ++Count;
-
-  // We don't have a uselist - clear the remnant if we are replacing a
-  // non-constant value.
-  Prev = nullptr;
-  Next = nullptr;
-}
-
-void Use::addToList(Use *&List) {
-  assert(!isa<ConstantData>(Val) && "ConstantData has no use-list");
-
-  Next = List;
-  if (Next)
-    Next->Prev = &Next;
-  Prev = &List;
-  List = this;
-}
-
-void Use::removeFromList(unsigned &Count) {
-  assert(isa<ConstantData>(Val));
-  assert(Count > 0 && "reference count underflow");
-  assert(!Prev && !Next && "should not have uselist remnant");
-  --Count;
-}
-
-void Use::removeFromList(Use *&List) {
-  *Prev = Next;
-  if (Next)
-    Next->Prev = Prev;
-}
-
 void Use::set(Value *V) {
-  if (Val)
-    Val->removeUse(*this);
+  removeFromList();
   Val = V;
   if (V)
     V->addUse(*this);
@@ -974,7 +916,7 @@ const Use &Use::operator=(const Use &RHS) {
 }
 
 template <class Compare> void Value::sortUseList(Compare Cmp) {
-  if (!hasUseList() || !Uses.List || !Uses.List->Next)
+  if (!UseList || !UseList->Next)
     // No need to sort 0 or 1 uses.
     return;
 
@@ -987,10 +929,10 @@ template <class Compare> void Value::sortUseList(Compare Cmp) {
   Use *Slots[MaxSlots];
 
   // Collect the first use, turning it into a single-item list.
-  Use *Next = Uses.List->Next;
-  Uses.List->Next = nullptr;
+  Use *Next = UseList->Next;
+  UseList->Next = nullptr;
   unsigned NumSlots = 1;
-  Slots[0] = Uses.List;
+  Slots[0] = UseList;
 
   // Collect all but the last use.
   while (Next->Next) {
@@ -1026,15 +968,15 @@ template <class Compare> void Value::sortUseList(Compare Cmp) {
   // Merge all the lists together.
   assert(Next && "Expected one more Use");
   assert(!Next->Next && "Expected only one Use");
-  Uses.List = Next;
+  UseList = Next;
   for (unsigned I = 0; I < NumSlots; ++I)
     if (Slots[I])
-      // Since the uses in Slots[I] originally preceded those in Uses.List, send
+      // Since the uses in Slots[I] originally preceded those in UseList, send
       // Slots[I] in as the left parameter to maintain a stable sort.
-      Uses.List = mergeUseLists(Slots[I], Uses.List, Cmp);
+      UseList = mergeUseLists(Slots[I], UseList, Cmp);
 
   // Fix the Prev pointers.
-  for (Use *I = Uses.List, **Prev = &Uses.List; I; I = I->Next) {
+  for (Use *I = UseList, **Prev = &UseList; I; I = I->Next) {
     I->Prev = Prev;
     Prev = &I->Next;
   }
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 889e24a3f70ad..eb076960a5def 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -4004,7 +4004,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
   // Globals with sub-elements such as combinations of arrays and structs
   // are handled recursively by emitGlobalConstantImpl. Keep track of the
   // constant symbol base and the current position with BaseCV and Offset.
-  if (!isa<ConstantData>(CV) && !BaseCV && CV->hasOneUse())
+  if (!BaseCV && CV->hasOneUse())
     BaseCV = dyn_cast<Constant>(CV->user_back());
 
   if (isa<ConstantAggregateZero>(CV)) {
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 610cbcb1a9b6b..7223dd845d18d 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -279,8 +279,7 @@ static UseListOrderMap predictUseListOrder(const Module *M) {
   UseListOrderMap ULOM;
   for (const auto &Pair : OM) {
     const Value *V = Pair.first;
-    if (!V->hasUseList() || V->use_empty() ||
-        std::next(V->use_begin()) == V->use_end())
+    if (V->use_empty() || std::next(V->use_begin()) == V->use_end())
       continue;
 
     std::vector<unsigned> Shuffle =
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 258681382f9e5..54e5e6d53e791 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -373,9 +373,7 @@ std::optional<BasicBlock::iterator> Instruction::getInsertionPointAfterDef() {
 }
 
 bool Instruction::isOnlyUserOfAnyOperand() {
-  return any_of(operands(), [](const Value *V) {
-    return V->hasUseList() && V->hasOneUser();
-  });
+  return any_of(operands(), [](const Value *V) { return V->hasOneUser(); });
 }
 
 void Instruction::setHasNoUnsignedWrap(bool b) {
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index 74a96051f33af..d6cb65d94a11d 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -148,14 +148,18 @@ void Value::destroyValueName() {
 }
 
 bool Value::hasNUses(unsigned N) const {
-  if (!hasUseList())
-    return Uses.Count == N;
+  if (!UseList)
+    return N == 0;
+
+  // TODO: Disallow for ConstantData and remove !UseList check?
   return hasNItems(use_begin(), use_end(), N);
 }
 
 bool Value::hasNUsesOrMore(unsigned N) const {
-  if (!hasUseList())
-    return Uses.Count >= N;
+  // TODO: Disallow for ConstantData and remove !UseList check?
+  if (!UseList)
+    return N == 0;
+
   return hasNItemsOrMore(use_begin(), use_end(), N);
 }
 
@@ -259,9 +263,9 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
 }
 
 unsigned Value::getNumUses() const {
-  if (!hasUseList())
-    return Uses.Count;
-
+  // TODO: Disallow for ConstantData and remove !UseList check?
+  if (!UseList)
+    return 0;
   return (unsigned)std::distance(use_begin(), use_end());
 }
 
@@ -522,7 +526,7 @@ void Value::doRAUW(Value *New, ReplaceMetadataUses ReplaceMetaUses) {
     ValueAsMetadata::handleRAUW(this, New);
 
   while (!materialized_use_empty()) {
-    Use &U = *Uses.List;
+    Use &U = *UseList;
     // Must handle Constants specially, we cannot call replaceUsesOfWith on a
     // constant because they are uniqued.
     if (auto *C = dyn_cast<Constant>(U.getUser())) {
@@ -1102,12 +1106,12 @@ const Value *Value::DoPHITranslation(const BasicBlock *CurBB,
 LLVMContext &Value::getContext() const { return VTy->getContext(); }
 
 void Value::reverseUseList() {
-  if (!Uses.List || !Uses.List->Next || !hasUseList())
+  if (!UseList || !UseList->Next)
     // No need to reverse 0 or 1 uses.
     return;
 
-  Use *Head = Uses.List;
-  Use *Current = Uses.List->Next;
+  Use *Head = UseList;
+  Use *Current = UseList->Next;
   Head->Next = nullptr;
   while (Current) {
     Use *Next = Current->Next;
@@ -1116,8 +1120,8 @@ void Value::reverseUseList() {
     Head = Current;
     Current = Next;
   }
-  Uses.List = Head;
-  Head->Prev = &Uses.List;
+  UseList = Head;
+  Head->Prev = &UseList;
 }
 
 bool Value::isSwiftError() const {
diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp
index a46178abd9066..41cc212f00de6 100644
--- a/llvm/unittests/IR/ConstantsTest.cpp
+++ b/llvm/unittests/IR/ConstantsTest.cpp
@@ -21,6 +21,44 @@
 namespace llvm {
 namespace {
 
+// Check that use count checks treat ConstantData like they have no uses.
+TEST(ConstantsTest, UseCounts) {
+  LLVMContext Context;
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  Constant *Zero = ConstantInt::get(Int32Ty, 0);
+
+  EXPECT_TRUE(Zero->use_empty());
+  EXPECT_EQ(Zero->getNumUses(), 0u);
+  EXPECT_TRUE(Zero->hasNUses(0));
+  EXPECT_FALSE(Zero->hasOneUse());
+  EXPECT_FALSE(Zero->hasOneUser());
+  EXPECT_FALSE(Zero->hasNUses(1));
+  EXPECT_FALSE(Zero->hasNUsesOrMore(1));
+  EXPECT_FALSE(Zero->hasNUses(2));
+  EXPECT_FALSE(Zero->hasNUsesOrMore(2));
+
+  std::unique_ptr<Module> M(new Module("MyModule", Context));
+
+  // Introduce some uses
+  new GlobalVariable(*M, Int32Ty, /*isConstant=*/false,
+                     GlobalValue::ExternalLinkage, /*Initializer=*/Zero,
+                     "gv_user0");
+  new GlobalVariable(*M, Int32Ty, /*isConstant=*/false,
+                     GlobalValue::ExternalLinkage, /*Initializer=*/Zero,
+                     "gv_user1");
+
+  // Still looks like use_empty with uses.
+  EXPECT_TRUE(Zero->use_empty());
+  EXPECT_EQ(Zero->getNumUses(), 0u);
+  EXPECT_TRUE(Zero->hasNUses(0));
+  EXPECT_FALSE(Zero->hasOneUse());
+  EXPECT_FALSE(Zero->hasOneUser());
+  EXPECT_FALSE(Zero->hasNUses(1));
+  EXPECT_FALSE(Zero->hasNUsesOrMore(1));
+  EXPECT_FALSE(Zero->hasNUses(2));
+  EXPECT_FALSE(Zero->hasNUsesOrMore(2));
+}
+
 TEST(ConstantsTest, Integer_i1) {
   LLVMContext Context;
   IntegerType *Int1 = IntegerType::get(Context, 1);

>From 87db0943e4d07640e4df3206045826c0688ed3b9 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 8 May 2025 14:08:21 +0800
Subject: [PATCH 099/115] MC: Support quoted symbol names

gas has supported " quoted symbols since 2015:
https://sourceware.org/pipermail/binutils/2015-August/090003.html

We don't handle \\ or \" , leading to clang -c --save-temps vs clang -c
difference for the following C code:

```
int x asm("a\"\\b");
```

Fix #138390

MC/COFF/safeseh.h looks incorrect. \01 in `.safeseh "\01foo"` is not a
correct escape sequence. Change it to \\

Pull Request: https://github.com/llvm/llvm-project/pull/138817
---
 llvm/lib/MC/MCContext.cpp       | 21 +++++++++++++++++++++
 llvm/lib/MC/MCSymbol.cpp        |  2 ++
 llvm/test/MC/AsmParser/quoted.s |  3 +++
 llvm/test/MC/COFF/safeseh.s     |  4 ++--
 llvm/test/MC/ELF/symbol-names.s | 22 +++++++++++++++++++---
 5 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index f70087e14f702..41caf20b331b2 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -212,6 +212,27 @@ MCDataFragment *MCContext::allocInitialFragment(MCSection &Sec) {
 MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
   SmallString<128> NameSV;
   StringRef NameRef = Name.toStringRef(NameSV);
+  if (NameRef.contains('\\')) {
+    NameSV = NameRef;
+    size_t S = 0;
+    // Support escaped \\ and \" as in GNU Assembler. GAS issues a warning for
+    // other characters following \\, which we do not implement due to code
+    // structure.
+    for (size_t I = 0, E = NameSV.size(); I < E; ++I) {
+      char C = NameSV[I];
+      if (C == '\\') {
+        switch (NameSV[I + 1]) {
+        case '"':
+        case '\\':
+          C = NameSV[++I];
+          break;
+        }
+      }
+      NameSV[S++] = C;
+    }
+    NameSV.resize(S);
+    NameRef = NameSV;
+  }
 
   assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
 
diff --git a/llvm/lib/MC/MCSymbol.cpp b/llvm/lib/MC/MCSymbol.cpp
index 2a709f4aef80c..3ca85b76a35d9 100644
--- a/llvm/lib/MC/MCSymbol.cpp
+++ b/llvm/lib/MC/MCSymbol.cpp
@@ -74,6 +74,8 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
       OS << "\\n";
     else if (C == '"')
       OS << "\\\"";
+    else if (C == '\\')
+      OS << "\\\\";
     else
       OS << C;
   }
diff --git a/llvm/test/MC/AsmParser/quoted.s b/llvm/test/MC/AsmParser/quoted.s
index 16b0997827603..710f2d0155f1c 100644
--- a/llvm/test/MC/AsmParser/quoted.s
+++ b/llvm/test/MC/AsmParser/quoted.s
@@ -9,6 +9,9 @@
 "a b":
   call "a b"
 
+# CHECK: "a b\\":
+"a b\\":
+
 #--- err.s
  "a\":
 # ERR: 1:2: error: unterminated string constant
diff --git a/llvm/test/MC/COFF/safeseh.s b/llvm/test/MC/COFF/safeseh.s
index d21628daff5fa..afcdc771ff994 100644
--- a/llvm/test/MC/COFF/safeseh.s
+++ b/llvm/test/MC/COFF/safeseh.s
@@ -2,5 +2,5 @@
 
 // check that we quote the output of .safeseh
 
-.safeseh "\01foo"
-// CHECK: .safeseh "\01foo"
+.safeseh "\\foo"
+// CHECK: .safeseh "\\foo"
diff --git a/llvm/test/MC/ELF/symbol-names.s b/llvm/test/MC/ELF/symbol-names.s
index f605c723d4d4d..427187c329acf 100644
--- a/llvm/test/MC/ELF/symbol-names.s
+++ b/llvm/test/MC/ELF/symbol-names.s
@@ -1,12 +1,28 @@
-// RUN: llvm-mc -triple i686-pc-linux -filetype=obj %s -o - | llvm-readobj --symbols - | FileCheck %s
+// RUN: llvm-mc -triple=x86_64 -filetype=obj %s | llvm-objdump -tdr - | FileCheck %s
 
 // MC allows ?'s in symbol names as an extension.
 
+// CHECK-LABEL:SYMBOL TABLE:
+// CHECK-NEXT: 0000000000000001 l     F .text  0000000000000000 a"b\{{$}}
+// CHECK-NEXT: 0000000000000006 l       .text  0000000000000000 a\{{$}}
+// CHECK-NEXT: 0000000000000000 g     F .text  0000000000000000 foo?bar
+// CHECK-NEXT: 0000000000000000 *UND*          0000000000000000 a"b\q{{$}}
+// CHECK-EMPTY:
+
 .text
 .globl foo?bar
 .type foo?bar, @function
 foo?bar:
 ret
 
-// CHECK: Symbol
-// CHECK: Name: foo?bar
+// CHECK-LABEL:<a"b\>:
+// CHECK-NEXT:   callq  {{.*}} <a"b\>
+// CHECK-LABEL:<a\>:
+// CHECK-NEXT:   callq  {{.*}}
+// CHECK-NEXT:     R_X86_64_PLT32 a"b\q-0x4
+.type "a\"b\\", @function
+"a\"b\\":
+  call "a\"b\\"
+"a\\":
+/// GAS emits a warning for \q
+  call "a\"b\q"

>From df4eac2f8b6d32772953d3d8063568fe4c0314c1 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Wed, 7 May 2025 23:08:11 -0700
Subject: [PATCH 100/115] [lldb-dap] Temporarily disable the breakpoint tests

At least one of these tests is failing every run on GreenDragon:
https://ci.swift.org/view/all/job/llvm.org/view/LLDB/job/as-lldb-cmake/
---
 .../test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py | 1 +
 .../tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py | 1 +
 .../tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
index 26df2573555df..aae1251b17c93 100644
--- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
@@ -12,6 +12,7 @@
 import os
 
 
+ at skip("Temporarily disable the breakpoint tests")
 class TestDAP_setBreakpoints(lldbdap_testcase.DAPTestCaseBase):
     def setUp(self):
         lldbdap_testcase.DAPTestCaseBase.setUp(self)
diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py
index 92ac66cd44c5d..4dc8c5b3c7ded 100644
--- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py
@@ -10,6 +10,7 @@
 import lldbdap_testcase
 
 
+ at skip("Temporarily disable the breakpoint tests")
 class TestDAP_setExceptionBreakpoints(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     def test_functionality(self):
diff --git a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py
index 946595f639edc..baaca4d974d5d 100644
--- a/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py
@@ -10,6 +10,7 @@
 import lldbdap_testcase
 
 
+ at skip("Temporarily disable the breakpoint tests")
 class TestDAP_setFunctionBreakpoints(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     def test_set_and_clear(self):

>From fc8484f0e383cc5cf31d67ad3e762705955ea1ea Mon Sep 17 00:00:00 2001
From: Matthias Springer <me at m-sp.org>
Date: Thu, 8 May 2025 08:22:38 +0200
Subject: [PATCH 101/115] [mlir][Transforms][NFC] Rename
 `MaterializationCallbackFn` (#138814)

There are two kind of materialization callbacks: one for target
materializations and one for source materializations. The callback type
for target materializations is `TargetMaterializationCallbackFn`. This
commit renames the one for source materializations from
`MaterializationCallbackFn` to `SourceMaterializationCallbackFn`, for
consistency.

There used to be a single callback type for both kind of
materializations, but the materialization function signatures have
changed over time.

Also clean up a few places in the documentation that still referred to
argument materializations.
---
 mlir/docs/DialectConversion.md                |  4 ++--
 .../mlir/Transforms/DialectConversion.h       | 21 +++++++++----------
 .../Transforms/Utils/DialectConversion.cpp    |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md
index f67d1411b3065..cf577eca5b9a6 100644
--- a/mlir/docs/DialectConversion.md
+++ b/mlir/docs/DialectConversion.md
@@ -338,7 +338,7 @@ class TypeConverter {
             typename T = typename llvm::function_traits<FnT>::template arg_t<1>>
   void addSourceMaterialization(FnT &&callback) {
     sourceMaterializations.emplace_back(
-        wrapMaterialization<T>(std::forward<FnT>(callback)));
+        wrapSourceMaterialization<T>(std::forward<FnT>(callback)));
   }
 
   /// This method registers a materialization that will be called when
@@ -362,7 +362,7 @@ class TypeConverter {
             typename T = typename llvm::function_traits<FnT>::template arg_t<1>>
   void addTargetMaterialization(FnT &&callback) {
     targetMaterializations.emplace_back(
-        wrapMaterialization<T>(std::forward<FnT>(callback)));
+        wrapTargetMaterialization<T>(std::forward<FnT>(callback)));
   }
 };
 ```
diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h
index b65b3ea971f91..e7d05c3ce1adf 100644
--- a/mlir/include/mlir/Transforms/DialectConversion.h
+++ b/mlir/include/mlir/Transforms/DialectConversion.h
@@ -186,7 +186,7 @@ class TypeConverter {
                               std::decay_t<FnT>>::template arg_t<1>>
   void addSourceMaterialization(FnT &&callback) {
     sourceMaterializations.emplace_back(
-        wrapMaterialization<T>(std::forward<FnT>(callback)));
+        wrapSourceMaterialization<T>(std::forward<FnT>(callback)));
   }
 
   /// This method registers a materialization that will be called when
@@ -330,11 +330,10 @@ class TypeConverter {
   using ConversionCallbackFn = std::function<std::optional<LogicalResult>(
       Type, SmallVectorImpl<Type> &)>;
 
-  /// The signature of the callback used to materialize a source/argument
-  /// conversion.
+  /// The signature of the callback used to materialize a source conversion.
   ///
   /// Arguments: builder, result type, inputs, location
-  using MaterializationCallbackFn =
+  using SourceMaterializationCallbackFn =
       std::function<Value(OpBuilder &, Type, ValueRange, Location)>;
 
   /// The signature of the callback used to materialize a target conversion.
@@ -387,12 +386,12 @@ class TypeConverter {
     cachedMultiConversions.clear();
   }
 
-  /// Generate a wrapper for the given argument/source materialization
-  /// callback. The callback may take any subclass of `Type` and the
-  /// wrapper will check for the target type to be of the expected class
-  /// before calling the callback.
+  /// Generate a wrapper for the given source materialization callback. The
+  /// callback may take any subclass of `Type` and the wrapper will check for
+  /// the target type to be of the expected class before calling the callback.
   template <typename T, typename FnT>
-  MaterializationCallbackFn wrapMaterialization(FnT &&callback) const {
+  SourceMaterializationCallbackFn
+  wrapSourceMaterialization(FnT &&callback) const {
     return [callback = std::forward<FnT>(callback)](
                OpBuilder &builder, Type resultType, ValueRange inputs,
                Location loc) -> Value {
@@ -491,7 +490,7 @@ class TypeConverter {
   SmallVector<ConversionCallbackFn, 4> conversions;
 
   /// The list of registered materialization functions.
-  SmallVector<MaterializationCallbackFn, 2> sourceMaterializations;
+  SmallVector<SourceMaterializationCallbackFn, 2> sourceMaterializations;
   SmallVector<TargetMaterializationCallbackFn, 2> targetMaterializations;
 
   /// The list of registered type attribute conversion functions.
@@ -740,7 +739,7 @@ class ConversionPatternRewriter final : public PatternRewriter {
   ///
   /// Optionally, a type converter can be provided to build materializations.
   /// Note: If no type converter was provided or the type converter does not
-  /// specify any suitable argument/target materialization rules, the dialect
+  /// specify any suitable source/target materialization rules, the dialect
   /// conversion may fail to legalize unresolved materializations.
   Block *
   applySignatureConversion(Block *block,
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index d50c26a0fd92e..0d208ce0f2f25 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -2959,7 +2959,7 @@ TypeConverter::convertSignatureArgs(TypeRange types,
 Value TypeConverter::materializeSourceConversion(OpBuilder &builder,
                                                  Location loc, Type resultType,
                                                  ValueRange inputs) const {
-  for (const MaterializationCallbackFn &fn :
+  for (const SourceMaterializationCallbackFn &fn :
        llvm::reverse(sourceMaterializations))
     if (Value result = fn(builder, resultType, inputs, loc))
       return result;

>From e7bf75043701119f8aeecff4c203cbcf2266fa62 Mon Sep 17 00:00:00 2001
From: fengfeng <153487255+fengfeng09 at users.noreply.github.com>
Date: Thu, 8 May 2025 14:29:13 +0800
Subject: [PATCH 102/115] [InstCombine] Pass disjoint in or combine (#138800)

Proof: https://alive2.llvm.org/ce/z/wtTm5V
https://alive2.llvm.org/ce/z/WC7Ai2

---------

Signed-off-by: feng.feng <feng.feng at iluvatar.com>
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 13 ++++-
 .../Transforms/InstCombine/or-or-combine.ll   | 48 +++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/InstCombine/or-or-combine.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 100c144c177fe..508aef63a3128 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3894,12 +3894,23 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
   // be simplified by a later pass either, so we try swapping the inner/outer
   // ORs in the hopes that we'll be able to simplify it this way.
   // (X|C) | V --> (X|V) | C
+  // Pass the disjoint flag in the following two patterns:
+  // 1. or-disjoint (or-disjoint X, C), V -->
+  //    or-disjoint (or-disjoint X, V), C
+  //
+  // 2. or-disjoint (or X, C), V -->
+  //    or (or-disjoint X, V), C
   ConstantInt *CI;
   if (Op0->hasOneUse() && !match(Op1, m_ConstantInt()) &&
       match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) {
+    bool IsDisjointOuter = cast<PossiblyDisjointInst>(I).isDisjoint();
+    bool IsDisjointInner = cast<PossiblyDisjointInst>(Op0)->isDisjoint();
     Value *Inner = Builder.CreateOr(A, Op1);
+    cast<PossiblyDisjointInst>(Inner)->setIsDisjoint(IsDisjointOuter);
     Inner->takeName(Op0);
-    return BinaryOperator::CreateOr(Inner, CI);
+    return IsDisjointOuter && IsDisjointInner
+               ? BinaryOperator::CreateDisjointOr(Inner, CI)
+               : BinaryOperator::CreateOr(Inner, CI);
   }
 
   // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))
diff --git a/llvm/test/Transforms/InstCombine/or-or-combine.ll b/llvm/test/Transforms/InstCombine/or-or-combine.ll
new file mode 100644
index 0000000000000..9dc0dcc29b2f2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or-or-combine.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; (X | C) | Y --> (X | Y) | C
+
+define i32 @test1(i32 %x, i32 %y) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[INNER:%.*]] = or disjoint i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[OUTER:%.*]] = or disjoint i32 [[INNER]], 5
+; CHECK-NEXT:    ret i32 [[OUTER]]
+;
+  %inner = or disjoint i32 %x, 5
+  %outer = or disjoint i32 %inner, %y
+  ret i32 %outer
+}
+
+define i32 @test2(i32 %x, i32 %y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[INNER:%.*]] = or disjoint i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[OUTER:%.*]] = or i32 [[INNER]], 5
+; CHECK-NEXT:    ret i32 [[OUTER]]
+;
+  %inner = or i32 %x, 5
+  %outer = or disjoint i32 %inner, %y
+  ret i32 %outer
+}
+
+define i32 @test3(i32 %x, i32 %y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[INNER:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[OUTER:%.*]] = or i32 [[INNER]], 5
+; CHECK-NEXT:    ret i32 [[OUTER]]
+;
+  %inner = or disjoint i32 %x, 5
+  %outer = or i32 %inner, %y
+  ret i32 %outer
+}
+
+define i32 @test4(i32 %x, i32 %y) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[INNER:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[OUTER:%.*]] = or i32 [[INNER]], 5
+; CHECK-NEXT:    ret i32 [[OUTER]]
+;
+  %inner = or i32 %x, 5
+  %outer = or i32 %inner, %y
+  ret i32 %outer
+}

>From c099caa292cb93b441b1082a9ea59265721bce13 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomas.preudhomme at arm.com>
Date: Thu, 8 May 2025 08:14:33 +0100
Subject: [PATCH 103/115] [MLIR][TOSA-Linalg] Fix rescale lowering for unsigned
 input zp (#138780)

Lowering of tosa.rescale to Linalg unconditionally sign-extend the input
zero-point value, even when unsigned_input is true. This commit refactor
zeropoint handling to share the same logic between input and output
zeropoint.
---
 .../Conversion/TosaToLinalg/TosaToLinalg.cpp  | 45 ++++++------------
 mlir/lib/Dialect/Tosa/IR/TosaOps.cpp          | 46 ++++++++++---------
 .../TosaToLinalg/tosa-to-linalg.mlir          | 38 +++++++++++++--
 mlir/test/Dialect/Tosa/invalid.mlir           |  2 +-
 mlir/test/Dialect/Tosa/ops.mlir               | 11 +++++
 5 files changed, 85 insertions(+), 57 deletions(-)

diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
index 95364c26d1a7d..0b69cd2814fb9 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -82,15 +82,6 @@ materializeBinaryNanCheckIfRequired(OpTy op, PatternRewriter &rewriter,
                                           rhsOrResult);
 }
 
-template <typename T>
-static arith::ConstantOp
-createConstOpFromZpVal(Operation *op, const int64_t &zp, Type requiredAttrType,
-                       OpBuilder &rewriter) {
-  auto castedN = static_cast<T>(zp);
-  return rewriter.create<arith::ConstantOp>(
-      op->getLoc(), IntegerAttr::get(requiredAttrType, castedN));
-}
-
 static Value createLinalgBodyCalculationForElementwiseOp(
     Operation *op, ValueRange args, ArrayRef<Type> resultTypes,
     ConversionPatternRewriter &rewriter) {
@@ -1467,11 +1458,6 @@ class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
           Value value = blockArgs[0];
           Type valueTy = value.getType();
 
-          // For now we do all of our math in 64-bit. This is not optimal but
-          // should be correct for now, consider computing correct bit depth
-          // later.
-          int32_t inBitwidth = valueTy.getIntOrFloatBitWidth() > 32 ? 48 : 32;
-
           FailureOr<int64_t> maybeIZp = op.getInputZeroPoint();
           if (failed(maybeIZp)) {
             (void)rewriter.notifyMatchFailure(
@@ -1479,9 +1465,12 @@ class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
             return;
           }
 
-          auto inputZp = createConstOpFromZpVal<int32_t>(
-              op, *maybeIZp, nestedBuilder.getIntegerType(inBitwidth),
-              nestedBuilder);
+          const int32_t inBitwidth = valueTy.getIntOrFloatBitWidth();
+          // Extend zeropoint for sub-32bits widths.
+          const int32_t inAttrBitwidth = inBitwidth > 32 ? inBitwidth : 32;
+          auto inputZp = nestedBuilder.create<arith::ConstantOp>(
+              loc, IntegerAttr::get(rewriter.getIntegerType(inAttrBitwidth),
+                                    *maybeIZp));
 
           FailureOr<int64_t> maybeOZp = op.getOutputZeroPoint();
           if (failed(maybeOZp)) {
@@ -1490,16 +1479,14 @@ class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
             return;
           };
 
-          // pre-process OutputZP as it can be unsigned
-          auto outBitwidth = outputTy.getElementType().getIntOrFloatBitWidth();
-          APInt OZp(outBitwidth, !op.getOutputUnsigned());
-          OZp = static_cast<int64_t>(*maybeOZp);
-          *maybeOZp = op.getOutputUnsigned()
-                          ? static_cast<int64_t>(OZp.getZExtValue())
-                          : OZp.getSExtValue();
-
-          auto outputZp = createConstOpFromZpVal<int32_t>(
-              op, *maybeOZp, nestedBuilder.getI32Type(), nestedBuilder);
+          IntegerType outIntType =
+              cast<IntegerType>(blockArgs.back().getType());
+          unsigned outBitWidth = outIntType.getWidth();
+          const int32_t outAttrBitwidth = 32;
+          assert(outBitWidth <= 32 && "Unexpected output zeropoint bitwidth");
+          auto outputZp = nestedBuilder.create<arith::ConstantOp>(
+              loc, IntegerAttr::get(rewriter.getIntegerType(outAttrBitwidth),
+                                    *maybeOZp));
 
           Value multiplier = multiplierConstant ? multiplierConstant
                                                 : blockArgs[multiplierArg];
@@ -1527,10 +1514,6 @@ class RescaleConverter : public OpRewritePattern<tosa::RescaleOp> {
               nestedBuilder.create<arith::AddIOp>(nestedLoc, value, outputZp);
 
           // Saturate to the output size.
-          IntegerType outIntType =
-              cast<IntegerType>(blockArgs.back().getType());
-          unsigned outBitWidth = outIntType.getWidth();
-
           int32_t intMin = APInt::getSignedMinValue(outBitWidth).getSExtValue();
           int32_t intMax = APInt::getSignedMaxValue(outBitWidth).getSExtValue();
 
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index de06b621cbe3d..371c6dc27b428 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -2118,7 +2118,7 @@ llvm::LogicalResult tosa::ReshapeOp::verify() {
 // return failure if val is not a constant
 // set zp to -1 if val is non-zero float or val is not integer nor float
 // otherwise set zp to val's constant value
-static FailureOr<int64_t> getZeroPoint(Value val) {
+static FailureOr<int64_t> getZeroPoint(Value val, bool signExtend) {
   ElementsAttr zpAttr;
   if (!matchPattern(val, m_Constant(&zpAttr))) {
     return failure();
@@ -2135,7 +2135,10 @@ static FailureOr<int64_t> getZeroPoint(Value val) {
   }
 
   if (llvm::isa<IntegerType>(zpElemType)) {
-    return zpAttr.getValues<APInt>()[0].getSExtValue();
+    if (signExtend)
+      return zpAttr.getValues<APInt>()[0].getSExtValue();
+    else
+      return zpAttr.getValues<APInt>()[0].getZExtValue();
   }
 
   // return non-zero value to trigger error check
@@ -2175,8 +2178,7 @@ static LogicalResult verifyZeroPoint(tosa::RescaleOp op, Value zpVal,
       return op.emitOpError()
              << "expect " << tensorName << "_zp of 0, got " << zp;
     }
-    if (zpElemType.isInteger(16) && tensorUnsigned &&
-        zp != static_cast<int16_t>(32768)) {
+    if (zpElemType.isInteger(16) && tensorUnsigned && zp != 32768) {
       return op.emitOpError() << "expect " << tensorName
                               << "_zp of 0 or 32768 for unsigned int16 "
                               << tensorName << ", got " << zp;
@@ -2186,30 +2188,30 @@ static LogicalResult verifyZeroPoint(tosa::RescaleOp op, Value zpVal,
   return success();
 }
 
-#define ZERO_POINT_HELPER(OP, OPERAND_NAME)                                    \
+#define ZERO_POINT_HELPER(OP, OPERAND_NAME, SIGN_EXTEND)                       \
   FailureOr<int64_t> tosa::OP::get##OPERAND_NAME##ZeroPoint() {                \
-    return getZeroPoint(get##OPERAND_NAME##Zp());                              \
+    return getZeroPoint(get##OPERAND_NAME##Zp(), SIGN_EXTEND);                 \
   }                                                                            \
   LogicalResult tosa::OP::verify##OPERAND_NAME##ZeroPoint(int64_t zp) {        \
     return verifyZeroPoint(*this, get##OPERAND_NAME##Zp(), zp, #OPERAND_NAME); \
   }
 
-ZERO_POINT_HELPER(Conv2DOp, Input)
-ZERO_POINT_HELPER(Conv2DOp, Weight)
-ZERO_POINT_HELPER(Conv3DOp, Input)
-ZERO_POINT_HELPER(Conv3DOp, Weight)
-ZERO_POINT_HELPER(DepthwiseConv2DOp, Input)
-ZERO_POINT_HELPER(DepthwiseConv2DOp, Weight)
-ZERO_POINT_HELPER(TransposeConv2DOp, Input)
-ZERO_POINT_HELPER(TransposeConv2DOp, Weight)
-ZERO_POINT_HELPER(AvgPool2dOp, Input)
-ZERO_POINT_HELPER(AvgPool2dOp, Output)
-ZERO_POINT_HELPER(MatMulOp, A)
-ZERO_POINT_HELPER(MatMulOp, B)
-ZERO_POINT_HELPER(NegateOp, Input1)
-ZERO_POINT_HELPER(NegateOp, Output)
-ZERO_POINT_HELPER(RescaleOp, Input)
-ZERO_POINT_HELPER(RescaleOp, Output)
+ZERO_POINT_HELPER(Conv2DOp, Input, true)
+ZERO_POINT_HELPER(Conv2DOp, Weight, true)
+ZERO_POINT_HELPER(Conv3DOp, Input, true)
+ZERO_POINT_HELPER(Conv3DOp, Weight, true)
+ZERO_POINT_HELPER(DepthwiseConv2DOp, Input, true)
+ZERO_POINT_HELPER(DepthwiseConv2DOp, Weight, true)
+ZERO_POINT_HELPER(TransposeConv2DOp, Input, true)
+ZERO_POINT_HELPER(TransposeConv2DOp, Weight, true)
+ZERO_POINT_HELPER(AvgPool2dOp, Input, true)
+ZERO_POINT_HELPER(AvgPool2dOp, Output, true)
+ZERO_POINT_HELPER(MatMulOp, A, true)
+ZERO_POINT_HELPER(MatMulOp, B, true)
+ZERO_POINT_HELPER(NegateOp, Input1, true)
+ZERO_POINT_HELPER(NegateOp, Output, true)
+ZERO_POINT_HELPER(RescaleOp, Input, !getInputUnsigned())
+ZERO_POINT_HELPER(RescaleOp, Output, !getOutputUnsigned())
 #undef ZERO_POINT_HELPER
 
 LogicalResult tosa::TransposeOp::inferReturnTypeComponents(
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index 7083d19f4372a..185f1973ecdc6 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -1241,10 +1241,10 @@ func.func @rescale_i8_unsigned_input(%arg0 : tensor<2xi8>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
-  // CHECK: [[C17:%.+]] = arith.constant 17
+  // CHECK: [[C128:%.+]] = arith.constant 128
   // CHECK: [[C22:%.+]] = arith.constant 22
   // CHECK-DAG: [[IN32:%.+]] = arith.extui [[IN]]
-  // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C17]]
+  // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN32]], [[C128]]
   // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C0]], [[C1]] {rounding_mode = "SINGLE_ROUND"}
   // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C22]]
   // CHECK-DAG: [[CMIN:%.+]] = arith.constant -128
@@ -1255,13 +1255,45 @@ func.func @rescale_i8_unsigned_input(%arg0 : tensor<2xi8>) -> () {
   // CHECK: linalg.yield [[TRUNC]]
   %multiplier = "tosa.const"() {values = dense<19689> : tensor<1xi16> } : () -> tensor<1xi16>
   %shift = "tosa.const"() {values = dense<15> : tensor<1xi8> } : () -> tensor<1xi8>
-  %input_zp = "tosa.const"() {values = dense<17> : tensor<1xi8>} : () -> tensor<1xi8>
+  %input_zp = "tosa.const"() {values = dense<-128> : tensor<1xi8>} : () -> tensor<1xi8>
   %output_zp = "tosa.const"() {values = dense<22> : tensor<1xi8>} : () -> tensor<1xi8>
   %0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = false, rounding_mode = "SINGLE_ROUND", per_channel = false, input_unsigned = true, output_unsigned = false} : (tensor<2xi8>, tensor<1xi16>, tensor<1xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8>
 
   return
 }
 
+// -----
+// CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: @rescale_i48_unsigned_output
+// CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]:
+func.func @rescale_i48_unsigned_output(%arg0 : tensor<2xi48>) -> () {
+  // CHECK: [[C19689:%.+]] = arith.constant 19689
+  // CHECK: [[C15:%.+]] = arith.constant 15
+  // CHECK: [[INIT:%.+]] = tensor.empty()
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi48>) outs([[INIT]] : tensor<2xi8>)
+  // CHECK: ^bb0([[IN:%.+]]: i48, [[UNUSED:%.+]]: i8):
+  // CHECK: [[C0:%.+]] = arith.constant 0
+  // CHECK: [[C234:%.+]] = arith.constant 234
+  // CHECK-DAG: [[IN_ZEROED:%.+]] = arith.subi [[IN]], [[C0]]
+  // CHECK-DAG: [[SCALED:%.+]] = tosa.apply_scale [[IN_ZEROED]], [[C19689]], [[C15]] {rounding_mode = "SINGLE_ROUND"}
+  // CHECK-DAG: [[SCALED_ZEROED:%.+]] = arith.addi [[SCALED]], [[C234]]
+  // CHECK-DAG: [[CMIN:%.+]] = arith.constant 0
+  // CHECK-DAG: [[CMAX:%.+]] = arith.constant 255
+  // CHECK-DAG: [[LOWER:%.+]] = arith.maxsi [[CMIN]], [[SCALED_ZEROED]]
+  // CHECK-DAG: [[BOUNDED:%.+]] = arith.minsi [[CMAX]], [[LOWER]]
+  // CHECK-DAG: [[TRUNC:%.+]] = arith.trunci [[BOUNDED]]
+  // CHECK: linalg.yield [[TRUNC]]
+  %multiplier = "tosa.const"() {values = dense<19689> : tensor<1xi16> } : () -> tensor<1xi16>
+  %shift = "tosa.const"() {values = dense<15> : tensor<1xi8> } : () -> tensor<1xi8>
+  %input_zp = "tosa.const"() {values = dense<0> : tensor<1xi48>} : () -> tensor<1xi48>
+  %output_zp = "tosa.const"() {values = dense<-22> : tensor<1xi8>} : () -> tensor<1xi8>
+  %1 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = false, rounding_mode = "SINGLE_ROUND", per_channel = false, input_unsigned = false, output_unsigned = true} : (tensor<2xi48>, tensor<1xi16>, tensor<1xi8>, tensor<1xi48>, tensor<1xi8>) -> tensor<2xi8>
+
+  // CHECK: return
+  return
+}
+
 // -----
 
 // CHECK: #[[$MAP0:.*]] = affine_map<(d0) -> (d0)>
diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir
index 9ccb310c4491d..56d76585be71b 100644
--- a/mlir/test/Dialect/Tosa/invalid.mlir
+++ b/mlir/test/Dialect/Tosa/invalid.mlir
@@ -1517,7 +1517,7 @@ func.func @test_rescale_invalid_output_zp_u16(%arg0: tensor<13x21x3xi16>) -> ten
   %shift = "tosa.const"() {values = dense<30> : tensor<1xi8> } : () -> tensor<1xi8>
   %input_zp = "tosa.const"() {values = dense<0> : tensor<1xi16>} : () -> tensor<1xi16>
   %output_zp = "tosa.const"() {values = dense<-1> : tensor<1xi16>} : () -> tensor<1xi16>
-  // expected-error at +1 {{'tosa.rescale' op expect output_zp of 0 or 32768 for unsigned int16 output, got -1}}
+  // expected-error at +1 {{'tosa.rescale' op expect output_zp of 0 or 32768 for unsigned int16 output, got 65535}}
   %0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {rounding_mode = "SINGLE_ROUND", per_channel = false, scale32 = true, input_unsigned = false, output_unsigned = true} : (tensor<13x21x3xi16>, tensor<1xi32>, tensor<1xi8>, tensor<1xi16>, tensor<1xi16>) -> tensor<13x21x3xi16>
   return %0 : tensor<13x21x3xi16>
 }
diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir
index f8273190bde40..e327ed900f45f 100644
--- a/mlir/test/Dialect/Tosa/ops.mlir
+++ b/mlir/test/Dialect/Tosa/ops.mlir
@@ -753,6 +753,17 @@ func.func @test_rescale(%arg0: tensor<13x21x3x!quant.uniform<u8:f32, 0.015655439
     return %0 : tensor<13x21x3x!quant.uniform<i8:f32, 0.015655439347028732:-1>>
 }
 
+// -----
+// CHECK-LABEL: rescale_i16_zp32768
+func.func @test_rescale_i16_zp32768(%arg0 : tensor<2xi8>) -> tensor<2xi16> {
+  %multiplier = "tosa.const"() {values = dense<19689> : tensor<1xi16>} : () -> tensor<1xi16>
+  %shift = "tosa.const"() {values = dense<15> : tensor<1xi8>} : () -> tensor<1xi8>
+  %input_zp = "tosa.const"() {values = dense<17> : tensor<1xi8>} : () -> tensor<1xi8>
+  %output_zp = "tosa.const"() {values = dense<32768> : tensor<1xi16>} : () -> tensor<1xi16>
+  %0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = false, rounding_mode = "SINGLE_ROUND", per_channel = false, input_unsigned = false, output_unsigned = true} : (tensor<2xi8>, tensor<1xi16>, tensor<1xi8>, tensor<1xi8>, tensor<1xi16>) -> tensor<2xi16>
+  return %0 : tensor<2xi16>
+}
+
 // -----
 // CHECK-LABEL: const
 func.func @test_const(%arg0 : index) -> tensor<4xi32> {

>From 09984be7d9c8b4ccb8fc27b0533c049f384a898f Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 8 May 2025 00:22:10 -0700
Subject: [PATCH 104/115] X86,test: Improve asm modifier tests

---
 llvm/test/CodeGen/X86/2010-07-06-asm-RIP.ll | 21 -------
 llvm/test/CodeGen/X86/asm-modifier-macho.ll | 67 +++++++++++++++++----
 llvm/test/CodeGen/X86/asm-modifier.ll       | 21 +++++++
 llvm/test/CodeGen/X86/pr19752.ll            | 15 -----
 4 files changed, 75 insertions(+), 49 deletions(-)
 delete mode 100644 llvm/test/CodeGen/X86/2010-07-06-asm-RIP.ll
 delete mode 100644 llvm/test/CodeGen/X86/pr19752.ll

diff --git a/llvm/test/CodeGen/X86/2010-07-06-asm-RIP.ll b/llvm/test/CodeGen/X86/2010-07-06-asm-RIP.ll
deleted file mode 100644
index 8b82d0ec78fa9..0000000000000
--- a/llvm/test/CodeGen/X86/2010-07-06-asm-RIP.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -no-integrated-as | FileCheck %s
-; PR 4752
-
- at n = global i32 0                                 ; <ptr> [#uses=2]
-
-define void @f(ptr) nounwind ssp {
-  ret void
-}
-
-define void @g() nounwind ssp {
-entry:
-; CHECK: _g:
-; CHECK: push $_f$_f
-; CHECK: call _f(%rip)
-  call void asm sideeffect "push\09$1$1\0A\09call\09${1:a}\0A\09pop\09%edx", "imr,i,~{dirflag},~{fpsr},~{flags},~{memory},~{cc},~{edi},~{esi},~{edx},~{ecx},~{ebx},~{eax}"(ptr @n, ptr @f) nounwind
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
-
diff --git a/llvm/test/CodeGen/X86/asm-modifier-macho.ll b/llvm/test/CodeGen/X86/asm-modifier-macho.ll
index 4f9124d8e225f..b8da424bbacca 100644
--- a/llvm/test/CodeGen/X86/asm-modifier-macho.ll
+++ b/llvm/test/CodeGen/X86/asm-modifier-macho.ll
@@ -1,21 +1,39 @@
-; RUN: llc < %s | FileCheck %s
-; ModuleID = 'asm.c'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9.6"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=i386-apple-darwin9.6 < %s | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefixes=CHECK,X64
 
 define i32 @test1() nounwind {
-entry:
 ; CHECK-LABEL: test1:
-; CHECK: movw	%gs:6, %ax
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    ## InlineAsm Start
+; CHECK-NEXT:    movw %gs:6, %ax
+; CHECK-NEXT:    ## InlineAsm End
+; CHECK-NEXT:    movzwl %ax, %eax
+; CHECK-NEXT:    ret{{[l|q]}}
+entry:
   %asmtmp.i = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 6) nounwind ; <i16> [#uses=1]
   %0 = zext i16 %asmtmp.i to i32                  ; <i32> [#uses=1]
   ret i32 %0
 }
 
 define zeroext i16 @test2(i32 %address) nounwind {
+; X86-LABEL: test2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    ## InlineAsm Start
+; X86-NEXT:    movw %gs:(%eax), %ax
+; X86-NEXT:    ## InlineAsm End
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test2:
+; X64:       ## %bb.0: ## %entry
+; X64-NEXT:    ## InlineAsm Start
+; X64-NEXT:    movw %gs:(%edi), %ax
+; X64-NEXT:    ## InlineAsm End
+; X64-NEXT:    movzwl %ax, %eax
+; X64-NEXT:    retq
 entry:
-; CHECK-LABEL: test2:
-; CHECK: movw	%gs:(%eax), %ax
   %asmtmp = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 %address) nounwind ; <i16> [#uses=1]
   ret i16 %asmtmp
 }
@@ -24,18 +42,41 @@ entry:
 @y = common global i32 0                          ; <ptr> [#uses=3]
 
 define void @test3() nounwind {
+; X86-LABEL: test3:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    ## InlineAsm Start
+; X86-NEXT:    movl _n, %eax
+; X86-NEXT:    ## InlineAsm End
+; X86-NEXT:    retl
+;
+; X64-LABEL: test3:
+; X64:       ## %bb.0: ## %entry
+; X64-NEXT:    ## InlineAsm Start
+; X64-NEXT:    movl _n(%rip), %eax
+; X64-NEXT:    ## InlineAsm End
+; X64-NEXT:    retq
 entry:
-; CHECK-LABEL: test3:
-; CHECK: movl _n, %eax
   call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(ptr @n) nounwind
   ret void
 }
 
 define void @test4() nounwind {
+; X86-LABEL: test4:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl L_y$non_lazy_ptr, %ecx
+; X86-NEXT:    ## InlineAsm Start
+; X86-NEXT:    movl (%ecx), %eax
+; X86-NEXT:    ## InlineAsm End
+; X86-NEXT:    retl
+;
+; X64-LABEL: test4:
+; X64:       ## %bb.0: ## %entry
+; X64-NEXT:    movq _y at GOTPCREL(%rip), %rcx
+; X64-NEXT:    ## InlineAsm Start
+; X64-NEXT:    movl (%rcx), %eax
+; X64-NEXT:    ## InlineAsm End
+; X64-NEXT:    retq
 entry:
-; CHECK-LABEL: test4:
-; CHECK: movl	L_y$non_lazy_ptr, %ecx
-; CHECK: movl (%ecx), %eax
   call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(ptr @y) nounwind
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/asm-modifier.ll b/llvm/test/CodeGen/X86/asm-modifier.ll
index 9a69402d22168..6bc800da04b48 100644
--- a/llvm/test/CodeGen/X86/asm-modifier.ll
+++ b/llvm/test/CodeGen/X86/asm-modifier.ll
@@ -4,6 +4,17 @@
 
 @var = internal global i32 0, align 4
 
+define dso_local void @test_a() nounwind {
+; CHECK-LABEL: test_a:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #TEST var#
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    ret{{[l|q]}}
+  tail call void asm sideeffect "#TEST ${0:c}#", "i"(ptr nonnull @var)
+  ret void
+}
+
 define dso_local void @test_c() nounwind {
 ; CHECK-LABEL: test_c:
 ; CHECK:       # %bb.0:
@@ -19,6 +30,16 @@ define dso_local void @test_c() nounwind {
   ret void
 }
 
+define dso_local void @test_k() nounwind {
+; CHECK-LABEL: test_k:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    movl %fs:0, %eax
+; CHECK-NEXT:    #NO_APP
+  %tmp = tail call i64 asm "movl %fs:${1:a}, ${0:k}", "=q,irm,~{dirflag},~{fpsr},~{flags}"(i64 0)
+  unreachable
+}
+
 define dso_local void @test_n() nounwind {
 ; CHECK-LABEL: test_n:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/pr19752.ll b/llvm/test/CodeGen/X86/pr19752.ll
deleted file mode 100644
index 0dd6895ecb4be..0000000000000
--- a/llvm/test/CodeGen/X86/pr19752.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
-target triple = "i386---elf"
-
-define void @test() {
-; CHECK-LABEL: test:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    movl %fs:0, %eax
-; CHECK-NEXT:    #NO_APP
-  %tmp = tail call i64 asm "movl %fs:${1:a}, ${0:k}", "=q,irm,~{dirflag},~{fpsr},~{flags}"(i64 0)
-  unreachable
-}

>From e18f248956b317f06f7822920c72d7a2eebcd267 Mon Sep 17 00:00:00 2001
From: Dmitry Vasilyev <dvassiliev at accesssoftek.com>
Date: Thu, 8 May 2025 11:22:37 +0400
Subject: [PATCH 105/115] [lldb][test] Disable flaky
 test_qThreadInfo_matches_qC_attach test on AArch64 Linux (#138940)

See #138085 for details.
https://lab.llvm.org/buildbot/#/builders/59/builds/16937
https://lab.llvm.org/buildbot/#/builders/59/builds/17224
---
 lldb/test/API/tools/lldb-server/TestLldbGdbServer.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
index 2c328125e3058..67690a275f0da 100644
--- a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
+++ b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
@@ -202,6 +202,13 @@ def test_qThreadInfo_matches_qC_launch(self):
         self.set_inferior_startup_launch()
         self.qThreadInfo_matches_qC()
 
+    # This test is flaky on AArch64 Linux. Sometimes it causes an unhandled Error:
+    # Operation not permitted in lldb_private::process_linux::NativeProcessLinux::Attach(int).
+    @skipIf(
+        oslist=["linux"],
+        archs=["aarch64"],
+        bugnumber="github.com/llvm/llvm-project/issues/138085",
+    )
     @expectedFailureAll(oslist=["windows"])  # expect one more thread stopped
     def test_qThreadInfo_matches_qC_attach(self):
         self.build()

>From 19174126cfe9f7e392104bd0bc56ca8ffb674115 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Thu, 8 May 2025 08:34:53 +0100
Subject: [PATCH 106/115] [SLP] Simplify buildTree() legality checks (NFC)
 (#138833)

This NFC aims to simplify the interfaces used in `buildTree()` to make
it easier to understand where decisions for legality are made.

In particular, there is now a single point of definition for legality
decisions. This makes it clear where all those decisions are made.
Previously, multiple variables with a large scope were passed by
reference.
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 116 ++++++++++--------
 1 file changed, 67 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a6ae26f2f0e1a..7fbbb2681b9ed 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4063,6 +4063,15 @@ class BoUpSLP {
   }
 #endif
 
+  /// Create a new gather TreeEntry
+  TreeEntry *newGatherTreeEntry(ArrayRef<Value *> VL,
+                                const InstructionsState &S,
+                                const EdgeInfo &UserTreeIdx,
+                                ArrayRef<int> ReuseShuffleIndices = {}) {
+    auto Invalid = ScheduleBundle::invalid();
+    return newTreeEntry(VL, Invalid, S, UserTreeIdx, ReuseShuffleIndices);
+  }
+
   /// Create a new VectorizableTree entry.
   TreeEntry *newTreeEntry(ArrayRef<Value *> VL, ScheduleBundle &Bundle,
                           const InstructionsState &S,
@@ -4251,13 +4260,34 @@ class BoUpSLP {
   bool areAltOperandsProfitable(const InstructionsState &S,
                                 ArrayRef<Value *> VL) const;
 
+  /// Contains all the outputs of legality analysis for a list of values to
+  /// vectorize.
+  class ScalarsVectorizationLegality {
+    InstructionsState S;
+    bool IsLegal;
+    bool TryToFindDuplicates;
+    bool TrySplitVectorize;
+
+  public:
+    ScalarsVectorizationLegality(InstructionsState S, bool IsLegal,
+                                 bool TryToFindDuplicates = true,
+                                 bool TrySplitVectorize = false)
+        : S(S), IsLegal(IsLegal), TryToFindDuplicates(TryToFindDuplicates),
+          TrySplitVectorize(TrySplitVectorize) {
+      assert((!IsLegal || (S.valid() && TryToFindDuplicates)) &&
+             "Inconsistent state");
+    }
+    const InstructionsState &getInstructionsState() const { return S; };
+    bool isLegal() const { return IsLegal; }
+    bool tryToFindDuplicates() const { return TryToFindDuplicates; }
+    bool trySplitVectorize() const { return TrySplitVectorize; }
+  };
+
   /// Checks if the specified list of the instructions/values can be vectorized
   /// in general.
-  bool isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
-                                 const EdgeInfo &UserTreeIdx,
-                                 InstructionsState &S,
-                                 bool &TryToFindDuplicates,
-                                 bool &TrySplitVectorize) const;
+  ScalarsVectorizationLegality
+  getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
+                                  const EdgeInfo &UserTreeIdx) const;
 
   /// Checks if the specified list of the instructions/values can be vectorized
   /// and fills required data before actual scheduling of the instructions.
@@ -9734,16 +9764,12 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
   return true;
 }
 
-bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
-                                        const EdgeInfo &UserTreeIdx,
-                                        InstructionsState &S,
-                                        bool &TryToFindDuplicates,
-                                        bool &TrySplitVectorize) const {
+BoUpSLP::ScalarsVectorizationLegality
+BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
+                                         const EdgeInfo &UserTreeIdx) const {
   assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
 
-  S = getSameOpcode(VL, *TLI);
-  TryToFindDuplicates = true;
-  TrySplitVectorize = false;
+  InstructionsState S = getSameOpcode(VL, *TLI);
 
   // Don't go into catchswitch blocks, which can happen with PHIs.
   // Such blocks can only have PHIs and the catchswitch.  There is no
@@ -9751,8 +9777,8 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
   if (S && isa<CatchSwitchInst>(S.getMainOp()->getParent()->getTerminator())) {
     LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n");
     // Do not try to pack to avoid extra instructions here.
-    TryToFindDuplicates = false;
-    return false;
+    return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
+                                        /*TryToFindDuplicates=*/false);
   }
 
   // Check if this is a duplicate of another entry.
@@ -9762,14 +9788,14 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
       if (E->isSame(VL)) {
         LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.getMainOp()
                           << ".\n");
-        return false;
+        return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
       }
       SmallPtrSet<Value *, 8> Values(llvm::from_range, E->Scalars);
       if (all_of(VL, [&](Value *V) {
             return isa<PoisonValue>(V) || Values.contains(V);
           })) {
         LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
-        return false;
+        return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
       }
     }
   }
@@ -9786,7 +9812,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
                   cast<Instruction>(I)->getOpcode() == S.getOpcode();
          })))) {
     LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
-    return false;
+    return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
   }
 
   // Don't handle scalable vectors
@@ -9794,15 +9820,15 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
       isa<ScalableVectorType>(
           cast<ExtractElementInst>(S.getMainOp())->getVectorOperandType())) {
     LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
-    return false;
+    return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
   }
 
   // Don't handle vectors.
   if (!SLPReVec && getValueType(VL.front())->isVectorTy()) {
     LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
     // Do not try to pack to avoid extra instructions here.
-    TryToFindDuplicates = false;
-    return false;
+    return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
+                                        /*TryToFindDuplicates=*/false);
   }
 
   // If all of the operands are identical or constant we have a simple solution.
@@ -9892,11 +9918,12 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
     if (!S) {
       LLVM_DEBUG(dbgs() << "SLP: Try split and if failed, gathering due to "
                            "C,S,B,O, small shuffle. \n");
-      TrySplitVectorize = true;
-      return false;
+      return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
+                                          /*TryToFindDuplicates=*/true,
+                                          /*TrySplitVectorize=*/true);
     }
     LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O, small shuffle. \n");
-    return false;
+    return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
   }
 
   // Don't vectorize ephemeral values.
@@ -9906,8 +9933,8 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
         LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
                           << ") is ephemeral.\n");
         // Do not try to pack to avoid extra instructions here.
-        TryToFindDuplicates = false;
-        return false;
+        return ScalarsVectorizationLegality(S, /*IsLegal=*/false,
+                                            /*TryToFindDuplicates=*/false);
       }
     }
   }
@@ -9956,7 +9983,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
     if (PreferScalarize) {
       LLVM_DEBUG(dbgs() << "SLP: The instructions are in tree and alternate "
                            "node is not profitable.\n");
-      return false;
+      return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
     }
   }
 
@@ -9965,7 +9992,7 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
     for (Value *V : VL) {
       if (UserIgnoreList->contains(V)) {
         LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
-        return false;
+        return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
       }
     }
   }
@@ -9995,9 +10022,9 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
     // Do not vectorize EH and non-returning blocks, not profitable in most
     // cases.
     LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
-    return false;
+    return ScalarsVectorizationLegality(S, /*IsLegal=*/false);
   }
-  return true;
+  return ScalarsVectorizationLegality(S, /*IsLegal=*/true);
 }
 
 void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
@@ -10008,7 +10035,6 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
   SmallVector<int> ReuseShuffleIndices;
   SmallVector<Value *> VL(VLRef.begin(), VLRef.end());
 
-  InstructionsState S = InstructionsState::invalid();
   // Tries to build split node.
   auto TrySplitNode = [&](const InstructionsState &LocalState) {
     SmallVector<Value *> Op1, Op2;
@@ -10042,22 +10068,20 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
     return true;
   };
 
-  bool TryToPackDuplicates;
-  bool TrySplitVectorize;
-  if (!isLegalToVectorizeScalars(VL, Depth, UserTreeIdx, S, TryToPackDuplicates,
-                                 TrySplitVectorize)) {
-    if (TrySplitVectorize) {
+  ScalarsVectorizationLegality Legality =
+      getScalarsVectorizationLegality(VL, Depth, UserTreeIdx);
+  const InstructionsState &S = Legality.getInstructionsState();
+  if (!Legality.isLegal()) {
+    if (Legality.trySplitVectorize()) {
       auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
       // Last chance to try to vectorize alternate node.
       if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
         return;
     }
-    if (TryToPackDuplicates)
+    if (Legality.tryToFindDuplicates())
       tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
 
-    auto Invalid = ScheduleBundle::invalid();
-    newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
-                 ReuseShuffleIndices);
+    newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
     return;
   }
 
@@ -10068,9 +10092,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
   // Check that every instruction appears once in this bundle.
   if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx,
                            /*TryPad=*/true)) {
-    auto Invalid = ScheduleBundle::invalid();
-    newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
-                 ReuseShuffleIndices);
+    newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
     return;
   }
 
@@ -10083,9 +10105,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
   TreeEntry::EntryState State = getScalarsVectorizationState(
       S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
   if (State == TreeEntry::NeedToGather) {
-    auto Invalid = ScheduleBundle::invalid();
-    newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
-                 ReuseShuffleIndices);
+    newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
     return;
   }
 
@@ -10109,9 +10129,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
     // Last chance to try to vectorize alternate node.
     if (S.isAltShuffle() && ReuseShuffleIndices.empty() && TrySplitNode(S))
       return;
-    auto Invalid = ScheduleBundle::invalid();
-    newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
-                 ReuseShuffleIndices);
+    newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices);
     NonScheduledFirst.insert(VL.front());
     if (S.getOpcode() == Instruction::Load &&
         BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit)

>From d307c774d0dd5062199f6d97b2184cb76e157542 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 8 May 2025 00:35:59 -0700
Subject: [PATCH 107/115] X86: Add asm modifier tests for 64-bit PIC

---
 llvm/test/CodeGen/X86/asm-modifier-pic.ll | 15 +++++++++++++++
 llvm/test/CodeGen/X86/asm-modifier.ll     | 14 +++++---------
 2 files changed, 20 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/asm-modifier-pic.ll

diff --git a/llvm/test/CodeGen/X86/asm-modifier-pic.ll b/llvm/test/CodeGen/X86/asm-modifier-pic.ll
new file mode 100644
index 0000000000000..a8d904fd96d9d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/asm-modifier-pic.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 -relocation-model=pic < %s | FileCheck %s
+
+ at var = internal global i32 0, align 4
+
+define dso_local void @test_a() nounwind {
+; CHECK-LABEL: test_a:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #TEST 42 var(%rip)#
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    retq
+  tail call void asm sideeffect "#TEST ${0:a} ${1:a}#", "i,i"(i32 42, ptr @var)
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/asm-modifier.ll b/llvm/test/CodeGen/X86/asm-modifier.ll
index 6bc800da04b48..e49e7d6b01964 100644
--- a/llvm/test/CodeGen/X86/asm-modifier.ll
+++ b/llvm/test/CodeGen/X86/asm-modifier.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc -mtriple=i686 < %s | FileCheck %s --check-prefixes=CHECK,X86
 ; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefixes=CHECK,X64
+;; Certain constraints should not be used for PIC. See asm-modifier-pic.ll for PIC-specific tests.
 
 @var = internal global i32 0, align 4
 
@@ -8,10 +9,10 @@ define dso_local void @test_a() nounwind {
 ; CHECK-LABEL: test_a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    #APP
-; CHECK-NEXT:    #TEST var#
+; CHECK-NEXT:    #TEST 42 var#
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    ret{{[l|q]}}
-  tail call void asm sideeffect "#TEST ${0:c}#", "i"(ptr nonnull @var)
+  tail call void asm sideeffect "#TEST ${0:a} ${1:a}#", "i,i"(i32 42, ptr @var)
   ret void
 }
 
@@ -19,14 +20,10 @@ define dso_local void @test_c() nounwind {
 ; CHECK-LABEL: test_c:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    #APP
-; CHECK-NEXT:    #TEST 42
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    #TEST var
+; CHECK-NEXT:    #TEST 42 var#
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    ret{{[l|q]}}
-  tail call void asm sideeffect "#TEST ${0:c}", "i"(i32 42)
-  tail call void asm sideeffect "#TEST ${0:c}", "i"(ptr nonnull @var)
+  tail call void asm sideeffect "#TEST ${0:c} ${1:c}#", "i,i"(i32 42, ptr @var)
   ret void
 }
 
@@ -82,4 +79,3 @@ entry:
   tail call void asm sideeffect "call __x86_indirect_thunk_${0:V}", "r,~{dirflag},~{fpsr},~{flags}"(ptr %p)
   ret void
 }
-

>From 01761a73e4d74bf1f84537b00fd89d7e75b71f5b Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <rampitec at users.noreply.github.com>
Date: Thu, 8 May 2025 00:42:35 -0700
Subject: [PATCH 108/115] [AMDGPU] Add missing intrinsic declaration to
 intrinsics.ll. NFC. (#138954)

---
 llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
index bb840023daf5d..1028cc9ebb342 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
@@ -329,6 +329,7 @@ bb:
 
 declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half>, <8 x half>, <4 x float>, i32 immarg, i32 immarg, i32 immarg)
 declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.f16(<8 x half>, <8 x half>, <16 x float>, i32 immarg, i32 immarg, i32 immarg)
+declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat>, <8 x bfloat>, <16 x float>, i32 immarg, i32 immarg, i32 immarg)
 
 ; CHECK: DIVERGENT: %result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 immarg 0, i32 immarg 0, i32 immarg 0)
 define amdgpu_kernel void @mfma_f32_16x16x32_f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, ptr addrspace(1) %out) {

>From 5b8664fcb253fbd9eb0e83db3c1c0e19dd2499ef Mon Sep 17 00:00:00 2001
From: haonan <haonan.yang at intel.com>
Date: Thu, 8 May 2025 16:00:49 +0800
Subject: [PATCH 109/115] [InstCombine][foldPHIArgGEPIntoPHI] Early return for
 const vector index for gep inst (#138661)

---
 .../Transforms/InstCombine/InstCombinePHI.cpp |  4 +-
 .../fold-phi-arg-gep-to-phi-negative.ll       | 39 +++++++++++++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 80308bf92dbbc..a842a5edcb8a3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -574,8 +574,8 @@ Instruction *InstCombinerImpl::foldPHIArgGEPIntoPHI(PHINode &PN) {
       // substantially cheaper to compute for the constants, so making it a
       // variable index could pessimize the path.  This also handles the case
       // for struct indices, which must always be constant.
-      if (isa<ConstantInt>(FirstInst->getOperand(Op)) ||
-          isa<ConstantInt>(GEP->getOperand(Op)))
+      if (isa<Constant>(FirstInst->getOperand(Op)) ||
+          isa<Constant>(GEP->getOperand(Op)))
         return nullptr;
 
       if (FirstInst->getOperand(Op)->getType() !=
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
new file mode 100644
index 0000000000000..0bbb1035b1093
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+%foo = type { i16, i16, i16 }
+
+define <16 x ptr> @test(i1 %tobool) {
+; CHECK-LABEL: define <16 x ptr> @test(
+; CHECK-SAME: i1 [[TOBOOL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LANE_0:%.*]] = alloca [[FOO:%.*]], align 16
+; CHECK-NEXT:    [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
+; CHECK:       [[F0]]:
+; CHECK-NEXT:    [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1)
+; CHECK-NEXT:    br label %[[MERGE:.*]]
+; CHECK:       [[F1]]:
+; CHECK-NEXT:    [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 2)
+; CHECK-NEXT:    br label %[[MERGE]]
+; CHECK:       [[MERGE]]:
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
+; CHECK-NEXT:    ret <16 x ptr> [[VEC_PHI]]
+;
+entry:
+  %lane.0 = alloca %foo, align 16
+  %lane.15 = insertelement <16 x ptr> poison, ptr %lane.0, i64 0
+  %mm_vectorGEP = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 1)
+  %mm_vectorGEP2 = getelementptr inbounds %foo, <16 x ptr> %lane.15, <16 x i64> zeroinitializer, <16 x i32> splat (i32 2)
+  br i1 %tobool, label %f1, label %f0
+
+f0:
+  br label %merge
+
+f1:
+  br label %merge
+
+merge:
+  %vec.phi = phi <16 x ptr> [ %mm_vectorGEP, %f0], [ %mm_vectorGEP2, %f1 ]
+  ret <16 x ptr> %vec.phi
+}

>From 2b140932880db4d7a220b1b76eff4eec15066c58 Mon Sep 17 00:00:00 2001
From: Pierre van Houtryve <pierre.vanhoutryve at amd.com>
Date: Thu, 8 May 2025 10:40:15 +0200
Subject: [PATCH 110/115] [CodeGen] Parse nusw flag (#138856)

Fixes #127781
---
 llvm/lib/CodeGen/MIRParser/MIParser.cpp       |  3 +++
 llvm/test/CodeGen/MIR/AMDGPU/ptradd-flags.mir | 23 +++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/ptradd-flags.mir

diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 5c8e32d11cfb0..858cdb6599c62 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1476,6 +1476,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
          Token.is(MIToken::kw_unpredictable) ||
          Token.is(MIToken::kw_nneg) ||
          Token.is(MIToken::kw_disjoint) ||
+         Token.is(MIToken::kw_nusw) ||
          Token.is(MIToken::kw_samesign)) {
     // clang-format on
     // Mine frame and fast math flags
@@ -1513,6 +1514,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
       Flags |= MachineInstr::NonNeg;
     if (Token.is(MIToken::kw_disjoint))
       Flags |= MachineInstr::Disjoint;
+    if (Token.is(MIToken::kw_nusw))
+      Flags |= MachineInstr::NoUSWrap;
     if (Token.is(MIToken::kw_samesign))
       Flags |= MachineInstr::SameSign;
 
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/ptradd-flags.mir b/llvm/test/CodeGen/MIR/AMDGPU/ptradd-flags.mir
new file mode 100644
index 0000000000000..5c14cafbd7a87
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/ptradd-flags.mir
@@ -0,0 +1,23 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=none  %s -o - | FileCheck %s
+
+---
+name:  nusw_ptr_add
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: nusw_ptr_add
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %ptr:_(p0) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: %off:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: %ret:_(p0) = nuw nusw G_PTR_ADD %ptr, %off(s64)
+    ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %ret(p0)
+    %ptr:_(p0) = COPY $vgpr0_vgpr1
+    %off:_(s64) = COPY $vgpr2_vgpr3
+    %ret:_(p0) = nuw nusw G_PTR_ADD %ptr, %off
+    $vgpr0_vgpr1 = COPY %ret
+...

>From 2668167e2cf935528f7d93cb3b12a651a29e52f6 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Thu, 8 May 2025 08:47:12 +0000
Subject: [PATCH 111/115] [lldb] Disable some lldb-dap tests on Windows

Since https://github.com/llvm/llvm-project/pull/138981 / https://github.com/llvm/llvm-project/commit/aeeb9a3c09f40f42a1e8e5e3c8dbde3b260744bd
were landed and tests re-enabled, these tests have been failing
on our Windows on Arm bot:
https://lab.llvm.org/buildbot/#/builders/141/builds/8523

********************
Unresolved Tests (1):
  lldb-api :: tools/lldb-dap/send-event/TestDAP_sendEvent.py
********************
Failed Tests (2):
  lldb-api :: tools/lldb-dap/launch/TestDAP_launch.py
  lldb-api :: tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
---
 lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py         | 1 +
 lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py  | 2 ++
 lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
index e8e9181f8da8d..acbe0366d1ecc 100644
--- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
+++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
@@ -546,6 +546,7 @@ def test_terminate_commands(self):
         )
         self.verify_commands("terminateCommands", output, terminateCommands)
 
+    @skipIfWindows
     def test_version(self):
         """
         Tests that "initialize" response contains the "version" string the same
diff --git a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
index 64cec70aa923b..3e015186d4b81 100644
--- a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
+++ b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py
@@ -10,6 +10,7 @@
 
 
 class TestDAP_sendEvent(lldbdap_testcase.DAPTestCaseBase):
+    @skipIfWindows
     def test_send_event(self):
         """
         Test sending a custom event.
@@ -42,6 +43,7 @@ def test_send_event(self):
         self.assertEqual(custom_event["event"], "my-custom-event")
         self.assertEqual(custom_event["body"], custom_event_body)
 
+    @skipIfWindows
     def test_send_internal_event(self):
         """
         Test sending an internal event produces an error.
diff --git a/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py b/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
index edf4adae14a3b..9c6f1d42feda2 100644
--- a/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
+++ b/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
@@ -201,6 +201,7 @@ def test_stackTrace(self):
             0, len(stackFrames), "verify zero frames with startFrame out of bounds"
         )
 
+    @skipIfWindows
     def test_functionNameWithArgs(self):
         """
         Test that the stack frame without a function name is given its pc in the response.
@@ -215,6 +216,7 @@ def test_functionNameWithArgs(self):
         frame = self.get_stackFrames()[0]
         self.assertEqual(frame["name"], "recurse(x=1)")
 
+    @skipIfWindows
     def test_StackFrameFormat(self):
         """
         Test the StackFrameFormat.

>From 2a32d738bb213a8a1e814b65beb61e39b7c66834 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 8 May 2025 10:08:49 +0100
Subject: [PATCH 112/115] [flang][OpenMP] fix predetermined privatization
 inside section (#138159)

This now produces code equivalent to if there was an explicit private
clause on the SECTIONS construct.

The problem was that each SECTION construct got its own DSP, which tried
to privatize the same symbol for that SECTION. Privatization for
SECTION(S) happens on the outer SECTION construct and so the outer
construct's DSP should be shared.

Fixes #135108
---
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |  2 ++
 flang/lib/Lower/OpenMP/OpenMP.cpp             |  1 +
 .../OpenMP/sections-predetermined-private.f90 | 34 +++++++++++++++++++
 3 files changed, 37 insertions(+)
 create mode 100644 flang/test/Lower/OpenMP/sections-predetermined-private.f90

diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index b88454c45da85..7eec598645eac 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -67,6 +67,8 @@ void DataSharingProcessor::processStep1(
 
 void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
   // 'sections' lastprivate is handled by genOMP()
+  if (mlir::isa<mlir::omp::SectionOp>(op))
+    return;
   if (!mlir::isa<mlir::omp::SectionsOp>(op)) {
     mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
     copyLastPrivatize(op);
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index cc793c683f898..099d5c604060f 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2154,6 +2154,7 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
         OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
                           llvm::omp::Directive::OMPD_section)
             .setClauses(&sectionQueue.begin()->clauses)
+            .setDataSharingProcessor(&dsp)
             .setEntryBlockArgs(&args),
         sectionQueue, sectionQueue.begin());
   }
diff --git a/flang/test/Lower/OpenMP/sections-predetermined-private.f90 b/flang/test/Lower/OpenMP/sections-predetermined-private.f90
new file mode 100644
index 0000000000000..9c2e2e127aa78
--- /dev/null
+++ b/flang/test/Lower/OpenMP/sections-predetermined-private.f90
@@ -0,0 +1,34 @@
+! RUN: %flang_fc1 -fopenmp -emit-hlfir -o - %s | FileCheck %s
+
+!$omp parallel sections
+!$omp section
+    do i = 1, 2
+    end do
+!$omp section
+    do i = 1, 2
+    end do
+!$omp end parallel sections
+end
+! CHECK-LABEL:   func.func @_QQmain() {
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "i", pinned}
+! CHECK:             %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             omp.sections {
+! CHECK:               omp.section {
+! CHECK:                 %[[VAL_11:.*]]:2 = fir.do_loop %[[VAL_12:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%{{.*}} = %{{.*}} -> (index, i32) {
+! CHECK:                 }
+! CHECK:                 fir.store %[[VAL_11]]#1 to %[[VAL_4]]#0 : !fir.ref<i32>
+! CHECK:                 omp.terminator
+! CHECK:               }
+! CHECK:               omp.section {
+! CHECK:                 %[[VAL_25:.*]]:2 = fir.do_loop %[[VAL_26:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%{{.*}} = %{{.*}}) -> (index, i32) {
+! CHECK:                 }
+! CHECK:                 fir.store %[[VAL_25]]#1 to %[[VAL_4]]#0 : !fir.ref<i32>
+! CHECK:                 omp.terminator
+! CHECK:               }
+! CHECK:               omp.terminator
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }

>From 18f89283ebac87a153708b8fe00056f96b83022a Mon Sep 17 00:00:00 2001
From: Luke Hutton <luke.hutton at arm.com>
Date: Thu, 8 May 2025 11:09:46 +0200
Subject: [PATCH 113/115] [mlir][tosa] Fix mul folder conformance to the spec
 (#137601)

Change the folder for mul with a shift such that the rounding happens
correctly according to the spec
pesudo-code.

Fixes:
https://discourse.llvm.org/t/tosa-mul-i32-shift-incorrect-result/86040
Partial cherry-pick from:
https://github.com/llvm/llvm-project/pull/128059

Co-authored-by: Tai Ly <tai.ly at arm.com>
---
 .../Dialect/Tosa/IR/TosaCanonicalizations.cpp | 31 +++++++++++---
 mlir/test/Dialect/Tosa/canonicalize.mlir      | 41 ++++++++++++++++++-
 2 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
index 47368532df169..e73e2c4e33522 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
@@ -918,6 +918,27 @@ OpFoldResult IntDivOp::fold(FoldAdaptor adaptor) {
 }
 
 namespace {
+// calculate lhs * rhs >> shift according to TOSA Spec
+// return nullopt if result is not in range of int32_t when shift > 0
+std::optional<APInt> mulInt(APInt lhs, APInt rhs, int32_t shift,
+                            unsigned bitwidth) {
+  APInt result = lhs.sext(64) * rhs.sext(64);
+
+  if (shift > 0) {
+    auto round = APInt(64, 1) << (shift - 1);
+    result += round;
+    result.ashrInPlace(shift);
+    // REQUIRE(product >= minimum_s<i32_t>() && product <= maximum_s<i32_t>())
+    if (!(result.getSExtValue() >= INT32_MIN &&
+          result.getSExtValue() <= INT32_MAX)) {
+      // REQUIRE failed
+      return std::nullopt;
+    }
+  }
+
+  return result.trunc(bitwidth);
+}
+
 DenseElementsAttr mulBinaryFolder(DenseElementsAttr lhs, DenseElementsAttr rhs,
                                   RankedTensorType ty, int32_t shift) {
   if (rhs && lhs && rhs.isSplat() && lhs.isSplat()) {
@@ -930,12 +951,10 @@ DenseElementsAttr mulBinaryFolder(DenseElementsAttr lhs, DenseElementsAttr rhs,
       }
 
       auto bitwidth = ty.getElementType().getIntOrFloatBitWidth();
-      l = l.sext(bitwidth * 2);
-      r = r.sext(bitwidth * 2);
-      auto result = l * r;
-      result.lshrInPlace(shift);
-      result = result.trunc(bitwidth);
-      return DenseElementsAttr::get(ty, result);
+      const std::optional<APInt> result = mulInt(l, r, shift, bitwidth);
+      if (!result)
+        return {};
+      return DenseElementsAttr::get(ty, result.value());
     }
 
     if (llvm::isa<FloatType>(ty.getElementType())) {
diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir
index 59fd490330691..c98335cdafe65 100644
--- a/mlir/test/Dialect/Tosa/canonicalize.mlir
+++ b/mlir/test/Dialect/Tosa/canonicalize.mlir
@@ -1226,4 +1226,43 @@ func.func @slice_dynamic_size_static_output_canonicalize(%arg0: tensor<2x60x59x?
     %1 = tosa.const_shape  {values = dense<[-1, 60, 58, -1]> : tensor<4xindex>} : () -> !tosa.shape<4>
     %2 = tosa.slice %arg0, %0, %1 : (tensor<2x60x59x?xf32>, !tosa.shape<4>, !tosa.shape<4>) -> tensor<2x60x58x?xf32>
     return %2 : tensor<2x60x58x?xf32>
-  }
+}
+
+// -----
+
+// CHECK-LABEL: @fold_mul_shift
+// CHECK-DAG: "tosa.const"() <{values = dense<1> : tensor<i32>}> : () -> tensor<i32>
+func.func @fold_mul_shift() -> tensor<i32> {
+    %0 = "tosa.const"() <{values = dense<-23661> : tensor<i32>}> : () -> tensor<i32>
+    %1 = "tosa.const"() <{values = dense<-33022> : tensor<i32>}> : () -> tensor<i32>
+    %2 = "tosa.const"() <{values = dense<30> : tensor<1xi8>}> : () -> tensor<1xi8>
+    %3 = tosa.mul %0, %1, %2 : (tensor<i32>, tensor<i32>, tensor<1xi8>) -> tensor<i32>
+    return %3 : tensor<i32>
+}
+
+// -----
+
+// CHECK-LABEL: @fold_mul_no_shift
+// CHECK-DAG: "tosa.const"() <{values = dense<781333542> : tensor<i32>}> : () -> tensor<i32>
+func.func @fold_mul_no_shift() -> tensor<i32> {
+    %0 = "tosa.const"() <{values = dense<-23661> : tensor<i32>}> : () -> tensor<i32>
+    %1 = "tosa.const"() <{values = dense<-33022> : tensor<i32>}> : () -> tensor<i32>
+    %2 = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
+    %3 = tosa.mul %0, %1, %2 : (tensor<i32>, tensor<i32>, tensor<1xi8>) -> tensor<i32>
+    return %3 : tensor<i32>
+}
+
+// -----
+
+// CHECK-LABEL: @no_fold_mul_result_exceeds_i32
+// CHECK-DAG: %[[LHS:.*]] = "tosa.const"() <{values = dense<23661> : tensor<i32>}> : () -> tensor<i32>
+// CHECK-DAG: %[[RHS:.*]] = "tosa.const"() <{values = dense<330222> : tensor<i32>}> : () -> tensor<i32>
+// CHECK-DAG: %[[SHIFT:.*]] = "tosa.const"() <{values = dense<1> : tensor<1xi8>}> : () -> tensor<1xi8>
+// CHECK: tosa.mul %[[LHS]], %[[RHS]], %[[SHIFT]] : (tensor<i32>, tensor<i32>, tensor<1xi8>) -> tensor<i32>
+func.func @no_fold_mul_result_exceeds_i32() -> tensor<i32> {
+    %0 = "tosa.const"() <{values = dense<23661> : tensor<i32>}> : () -> tensor<i32>
+    %1 = "tosa.const"() <{values = dense<330222> : tensor<i32>}> : () -> tensor<i32>
+    %2 = "tosa.const"() <{values = dense<1> : tensor<1xi8>}> : () -> tensor<1xi8>
+    %3 = tosa.mul %0, %1, %2 : (tensor<i32>, tensor<i32>, tensor<1xi8>) -> tensor<i32>
+    return %3 : tensor<i32>
+}

>From fc2ec06ccab498447914c076f1e7b4326dc321c2 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Thu, 8 May 2025 10:40:21 +0100
Subject: [PATCH 114/115] [KeyInstr] Add MIR parser support (#133494)

RFC: https://discourse.llvm.org/t/rfc-improving-is-stmt-placement-for-better-interactive-debugging/82668
---
 llvm/lib/CodeGen/MIRParser/MIParser.cpp       | 26 ++++++++++++-
 .../DebugInfo/KeyInstructions/X86/parse.mir   | 39 +++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/DebugInfo/KeyInstructions/X86/parse.mir

diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 858cdb6599c62..cbdeacda3eb87 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -2332,6 +2332,8 @@ bool MIParser::parseDILocation(MDNode *&Loc) {
   MDNode *Scope = nullptr;
   MDNode *InlinedAt = nullptr;
   bool ImplicitCode = false;
+  uint64_t AtomGroup = 0;
+  uint64_t AtomRank = 0;
 
   if (expectAndConsume(MIToken::lparen))
     return true;
@@ -2406,6 +2408,28 @@ bool MIParser::parseDILocation(MDNode *&Loc) {
           lex();
           continue;
         }
+        if (Token.stringValue() == "atomGroup") {
+          lex();
+          if (expectAndConsume(MIToken::colon))
+            return true;
+          if (Token.isNot(MIToken::IntegerLiteral) ||
+              Token.integerValue().isSigned())
+            return error("expected unsigned integer");
+          AtomGroup = Token.integerValue().getZExtValue();
+          lex();
+          continue;
+        }
+        if (Token.stringValue() == "atomRank") {
+          lex();
+          if (expectAndConsume(MIToken::colon))
+            return true;
+          if (Token.isNot(MIToken::IntegerLiteral) ||
+              Token.integerValue().isSigned())
+            return error("expected unsigned integer");
+          AtomRank = Token.integerValue().getZExtValue();
+          lex();
+          continue;
+        }
       }
       return error(Twine("invalid DILocation argument '") +
                    Token.stringValue() + "'");
@@ -2421,7 +2445,7 @@ bool MIParser::parseDILocation(MDNode *&Loc) {
     return error("DILocation requires a scope");
 
   Loc = DILocation::get(MF.getFunction().getContext(), Line, Column, Scope,
-                        InlinedAt, ImplicitCode);
+                        InlinedAt, ImplicitCode, AtomGroup, AtomRank);
   return false;
 }
 
diff --git a/llvm/test/DebugInfo/KeyInstructions/X86/parse.mir b/llvm/test/DebugInfo/KeyInstructions/X86/parse.mir
new file mode 100644
index 0000000000000..45cc23831412c
--- /dev/null
+++ b/llvm/test/DebugInfo/KeyInstructions/X86/parse.mir
@@ -0,0 +1,39 @@
+# RUN: llc %s --run-pass=none -o - | FileCheck %s
+
+## Check the MIR parser understands atomGroup and atomRank.
+
+# CHECK: RET64 $eax, debug-location !DILocation(line: 2, scope: ![[#]], atomGroup: 1, atomRank: 2)
+
+--- |
+  target triple = "x86_64-unknown-linux-gnu"
+  define hidden noundef i32 @p() local_unnamed_addr !dbg !5 {
+  entry:
+    ret i32 0
+  }
+
+  declare void @_Z12prologue_endv() local_unnamed_addr
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!2, !3}
+  !llvm.ident = !{!4}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_17, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None)
+  !1 = !DIFile(filename: "test.cpp", directory: "/")
+  !2 = !{i32 7, !"Dwarf Version", i32 5}
+  !3 = !{i32 2, !"Debug Info Version", i32 3}
+  !4 = !{!"clang version 19.0.0"}
+  !5 = distinct !DISubprogram(name: "p", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+  !6 = !DISubroutineType(types: !7)
+  !7 = !{}
+
+...
+---
+name:            p
+alignment:       16
+body:             |
+  bb.0.entry:
+    liveins: $edx, $esi, $rbp, $rbx
+    renamable $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags
+    RET64 $eax, debug-location !DILocation(line: 2, scope: !5, atomGroup: 1, atomRank: 2)
+
+...

>From 38b230aee385bd6ea9ae59b2e9790ba9c6e42ff2 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Wed, 26 Mar 2025 17:27:27 +0000
Subject: [PATCH 115/115] Propagate DebugLocs on phis in BreakCriticalEdges

---
 .../Transforms/Utils/BreakCriticalEdges.cpp   |  3 ++
 .../CodeGenPrepare/X86/split-dbg.ll           | 49 +++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/split-dbg.ll

diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 0721358eb03bb..aea47aec7bb80 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -454,6 +454,7 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
       PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", InsertPt);
       NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
                              IBRPred);
+      NewIndPHI->setDebugLoc(IndPHI->getDebugLoc());
 
       // Create a PHI in the body block, to merge the direct and indirect
       // predecessors.
@@ -461,6 +462,8 @@ bool llvm::SplitIndirectBrCriticalEdges(Function &F,
       MergePHI->insertBefore(MergeInsert);
       MergePHI->addIncoming(NewIndPHI, Target);
       MergePHI->addIncoming(DirPHI, DirectSucc);
+      MergePHI->applyMergedLocation(DirPHI->getDebugLoc(),
+                                    IndPHI->getDebugLoc());
 
       IndPHI->replaceAllUsesWith(MergePHI);
       IndPHI->eraseFromParent();
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/split-dbg.ll b/llvm/test/Transforms/CodeGenPrepare/X86/split-dbg.ll
new file mode 100644
index 0000000000000..773ae3ff9a3e5
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/split-dbg.ll
@@ -0,0 +1,49 @@
+; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -S -mtriple=x86_64 < %s \
+; RUN: | FileCheck %s
+
+;; Check debug locations are propagated onto new PHIs.
+
+; CHECK: .split:
+; CHECK-NEXT: %merge = phi i32 [ poison, %while.body ], [ %dest.sroa.clone, %while.body.clone ], !dbg [[DBG:!.*]]
+
+; CHECK: while.body.clone:
+; CHECK-NEXT: %dest.sroa.clone = phi i32 [ %1, %.split ], [ poison, %if.else ], !dbg [[DBG]]
+
+define void @test(i1 %c) !dbg !5 {
+entry:
+  br label %if.else
+
+if.else:                                          ; preds = %if.else1, %entry
+  br i1 %c, label %while.body, label %preheader
+
+preheader:                                        ; preds = %if.else
+  br label %if.else1
+
+if.then:                                          ; preds = %if.else1
+  unreachable
+
+while.body:                                       ; preds = %if.else1, %while.body, %if.else
+  %dest.sroa = phi i32 [ %1, %while.body ], [ poison, %if.else1 ], [ poison, %if.else ], !dbg !8
+  %0 = inttoptr i32 %dest.sroa to ptr
+  %incdec.ptr = getelementptr inbounds i8, ptr %0, i32 -1
+  %1 = ptrtoint ptr %incdec.ptr to i32
+  store i8 0, ptr %incdec.ptr, align 1
+  br label %while.body
+
+if.else1:                                         ; preds = %if.else1, %preheader
+  indirectbr ptr poison, [label %if.then, label %while.body, label %if.else, label %if.else1]
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!2, !3}
+!llvm.module.flags = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "test.ll", directory: "/")
+!2 = !{i32 11}
+!3 = !{i32 0}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "test", linkageName: "test", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !DILocation(line: 1, column: 1, scope: !5)