[llvm-branch-commits] [clang] [flang] [libc] [lldb] [llvm] [AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` (PR #139185)

Thu May 8 21:33:28 PDT 2025

https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/139185

>From 6bb30196912daeaa92babc39519b2ae0bfce9771 Mon Sep 17 00:00:00 2001
From: jimingham <jingham at apple.com>
Date: Thu, 8 May 2025 16:22:39 -0700
Subject: [PATCH 01/25] Branch island debug (#139166)

This patch allows lldb to step in across "branch islands" which is the
Darwin linker's way of dealing with immediate branches to targets that
are too far away for the immediate slot to make the jump.

I submitted this a couple days ago and it failed on the arm64 bot. I was
able to match the bot OS and Tool versions (they are a bit old at this
point) and ran the test there but sadly it succeeded. The x86_64 bot
also failed but that was my bad, I did @skipUnlessDarwin when I should
have done @skipUnlessAppleSilicon.

So this resubmission is with the proper decoration for the test, and
with a bunch of debug output printed in case of failure. With any luck,
if this resubmission fails again I'll be able to see what's going on.
---
 .../MacOSX-DYLD/DynamicLoaderDarwin.cpp       | 32 +++++++--
 lldb/test/API/macosx/branch-islands/Makefile  | 16 +++++
 .../branch-islands/TestBranchIslands.py       | 65 +++++++++++++++++++
 lldb/test/API/macosx/branch-islands/foo.c     |  6 ++
 lldb/test/API/macosx/branch-islands/main.c    |  6 ++
 .../test/API/macosx/branch-islands/padding1.s |  3 +
 .../test/API/macosx/branch-islands/padding2.s |  3 +
 .../test/API/macosx/branch-islands/padding3.s |  3 +
 .../test/API/macosx/branch-islands/padding4.s |  3 +
 9 files changed, 130 insertions(+), 7 deletions(-)
 create mode 100644 lldb/test/API/macosx/branch-islands/Makefile
 create mode 100644 lldb/test/API/macosx/branch-islands/TestBranchIslands.py
 create mode 100644 lldb/test/API/macosx/branch-islands/foo.c
 create mode 100644 lldb/test/API/macosx/branch-islands/main.c
 create mode 100644 lldb/test/API/macosx/branch-islands/padding1.s
 create mode 100644 lldb/test/API/macosx/branch-islands/padding2.s
 create mode 100644 lldb/test/API/macosx/branch-islands/padding3.s
 create mode 100644 lldb/test/API/macosx/branch-islands/padding4.s

diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
index e25c4ff55e408..578ab12268ea3 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
@@ -26,6 +26,7 @@
 #include "lldb/Target/Thread.h"
 #include "lldb/Target/ThreadPlanCallFunction.h"
 #include "lldb/Target/ThreadPlanRunToAddress.h"
+#include "lldb/Target/ThreadPlanStepInstruction.h"
 #include "lldb/Utility/DataBuffer.h"
 #include "lldb/Utility/DataBufferHeap.h"
 #include "lldb/Utility/LLDBLog.h"
@@ -923,15 +924,15 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
   if (current_symbol != nullptr) {
     std::vector<Address> addresses;
 
+    ConstString current_name =
+        current_symbol->GetMangled().GetName(Mangled::ePreferMangled);
     if (current_symbol->IsTrampoline()) {
-      ConstString trampoline_name =
-          current_symbol->GetMangled().GetName(Mangled::ePreferMangled);
 
-      if (trampoline_name) {
+      if (current_name) {
         const ModuleList &images = target_sp->GetImages();
 
         SymbolContextList code_symbols;
-        images.FindSymbolsWithNameAndType(trampoline_name, eSymbolTypeCode,
+        images.FindSymbolsWithNameAndType(current_name, eSymbolTypeCode,
                                           code_symbols);
         for (const SymbolContext &context : code_symbols) {
           Address addr = context.GetFunctionOrSymbolAddress();
@@ -945,8 +946,8 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
         }
 
         SymbolContextList reexported_symbols;
-        images.FindSymbolsWithNameAndType(
-            trampoline_name, eSymbolTypeReExported, reexported_symbols);
+        images.FindSymbolsWithNameAndType(current_name, eSymbolTypeReExported,
+                                          reexported_symbols);
         for (const SymbolContext &context : reexported_symbols) {
           if (context.symbol) {
             Symbol *actual_symbol =
@@ -968,7 +969,7 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
         }
 
         SymbolContextList indirect_symbols;
-        images.FindSymbolsWithNameAndType(trampoline_name, eSymbolTypeResolver,
+        images.FindSymbolsWithNameAndType(current_name, eSymbolTypeResolver,
                                           indirect_symbols);
 
         for (const SymbolContext &context : indirect_symbols) {
@@ -1028,6 +1029,23 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
       thread_plan_sp = std::make_shared<ThreadPlanRunToAddress>(
           thread, load_addrs, stop_others);
     }
+    // One more case we have to consider is "branch islands".  These are regular
+    // TEXT symbols but their names end in .island plus maybe a .digit suffix.
+    // They are to allow arm64 code to branch further than the size of the
+    // address slot allows.  We just need to single-instruction step in that
+    // case.
+    static const char *g_branch_island_pattern = "\\.island\\.?[0-9]*$";
+    static RegularExpression g_branch_island_regex(g_branch_island_pattern);
+
+    bool is_branch_island = g_branch_island_regex.Execute(current_name);
+    if (!thread_plan_sp && is_branch_island) {
+      thread_plan_sp = std::make_shared<ThreadPlanStepInstruction>(
+          thread,
+          /* step_over= */ false, /* stop_others */ false, eVoteNoOpinion,
+          eVoteNoOpinion);
+      LLDB_LOG(log, "Stepping one instruction over branch island: '{0}'.",
+               current_name);
+    }
   } else {
     LLDB_LOGF(log, "Could not find symbol for step through.");
   }
diff --git a/lldb/test/API/macosx/branch-islands/Makefile b/lldb/test/API/macosx/branch-islands/Makefile
new file mode 100644
index 0000000000000..062e947f6d6ee
--- /dev/null
+++ b/lldb/test/API/macosx/branch-islands/Makefile
@@ -0,0 +1,16 @@
+C_SOURCES := main.c foo.c
+CFLAGS_EXTRAS := -std=c99
+
+include Makefile.rules
+
+a.out: main.o padding1.o padding2.o padding3.o padding4.o foo.o
+	${CC} ${LDFLAGS} foo.o padding1.o padding2.o padding3.o padding4.o main.o -o a.out
+
+%.o: $(SRCDIR)/%.s
+	${CC} -c $<
+
+#padding1.o: padding1.s
+#	${CC} -c $(SRCDIR)/padding1.s
+
+#padding2.o: padding2.s
+#	${CC} -c $(SRCDIR)/padding2.s
diff --git a/lldb/test/API/macosx/branch-islands/TestBranchIslands.py b/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
new file mode 100644
index 0000000000000..c79840b400432
--- /dev/null
+++ b/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
@@ -0,0 +1,65 @@
+"""
+Make sure that we can step in across an arm64 branch island
+"""
+
+
+import lldb
+import lldbsuite.test.lldbutil as lldbutil
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import *
+
+
+class TestBranchIslandStepping(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    @skipUnlessAppleSilicon
+    def test_step_in_branch_island(self):
+        """Make sure we can step in across a branch island"""
+        self.build()
+        self.main_source_file = lldb.SBFileSpec("main.c")
+        self.do_test()
+
+    def do_test(self):
+        (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
+            self, "Set a breakpoint here", self.main_source_file
+        )
+
+        # Make sure that we did manage to generate a branch island for foo:
+        syms = target.FindSymbols("foo.island", lldb.eSymbolTypeCode)
+        self.assertEqual(len(syms), 1, "We did generate an island for foo")
+
+        # Gathering some info to dump in case of failure:
+        trace_before = lldbutil.print_stacktrace(thread, True)
+        func_before = thread.frames[0].function
+
+        thread.StepInto()
+        stop_frame = thread.frames[0]
+        # This is failing on the bot, but I can't reproduce the failure
+        # locally.  Let's see if we can dump some more info here to help
+        # figure out what went wrong...
+        if stop_frame.name.find("foo") == -1:
+            stream = lldb.SBStream()
+            print("Branch island symbols: ")
+            syms[0].GetDescription(stream)
+            for i in range(0, 6):
+                for sep in ["", "."]:
+                    syms = target.FindSymbols(
+                        f"foo.island{sep}{i}", lldb.eSymbolTypeCode
+                    )
+                    if len(syms) > 0:
+                        stream.Print("\n")
+                        syms[0].GetDescription(stream)
+
+            print(stream.GetData())
+            print(f"Start backtrace:")
+            print(trace_before)
+            print(f"\n'main' disassembly:\n{lldbutil.disassemble(target, func_before)}")
+            print("\nEnd backtrace:\n")
+            lldbutil.print_stacktrace(thread)
+            print(
+                f"\nStop disassembly:\n {lldbutil.disassemble(target, stop_frame.function)}"
+            )
+
+        self.assertIn("foo", stop_frame.name, "Stepped into foo")
+        var = stop_frame.FindVariable("a_variable_in_foo")
+        self.assertTrue(var.IsValid(), "Found the variable in foo")
diff --git a/lldb/test/API/macosx/branch-islands/foo.c b/lldb/test/API/macosx/branch-islands/foo.c
new file mode 100644
index 0000000000000..a5dd2e59e1d82
--- /dev/null
+++ b/lldb/test/API/macosx/branch-islands/foo.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+void foo() {
+  int a_variable_in_foo = 10;
+  printf("I am foo: %d.\n", a_variable_in_foo);
+}
diff --git a/lldb/test/API/macosx/branch-islands/main.c b/lldb/test/API/macosx/branch-islands/main.c
new file mode 100644
index 0000000000000..b5578bdd715df
--- /dev/null
+++ b/lldb/test/API/macosx/branch-islands/main.c
@@ -0,0 +1,6 @@
+extern void foo();
+
+int main() {
+  foo(); // Set a breakpoint here
+  return 0;
+}
diff --git a/lldb/test/API/macosx/branch-islands/padding1.s b/lldb/test/API/macosx/branch-islands/padding1.s
new file mode 100644
index 0000000000000..4911e53b0240d
--- /dev/null
+++ b/lldb/test/API/macosx/branch-islands/padding1.s
@@ -0,0 +1,3 @@
+.text
+_padding1:
+.space 120*1024*1024
diff --git a/lldb/test/API/macosx/branch-islands/padding2.s b/lldb/test/API/macosx/branch-islands/padding2.s
new file mode 100644
index 0000000000000..5ad1bad11263b
--- /dev/null
+++ b/lldb/test/API/macosx/branch-islands/padding2.s
@@ -0,0 +1,3 @@
+.text
+_padding2:
+.space 120*1024*1024
diff --git a/lldb/test/API/macosx/branch-islands/padding3.s b/lldb/test/API/macosx/branch-islands/padding3.s
new file mode 100644
index 0000000000000..9f614eecf56d9
--- /dev/null
+++ b/lldb/test/API/macosx/branch-islands/padding3.s
@@ -0,0 +1,3 @@
+.text
+_padding3:
+.space 120*1024*1024
diff --git a/lldb/test/API/macosx/branch-islands/padding4.s b/lldb/test/API/macosx/branch-islands/padding4.s
new file mode 100644
index 0000000000000..12896cf5e5b8e
--- /dev/null
+++ b/lldb/test/API/macosx/branch-islands/padding4.s
@@ -0,0 +1,3 @@
+.text
+_padding4:
+.space 120*1024*1024

>From a25d38ddb74d6f7789daf2e7ead416054d2c6cd8 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 8 May 2025 16:26:23 -0700
Subject: [PATCH 02/25] [RISCV] Correct the SDTypeProfile for
 RISCVISD::PROBED_ALLOCA (#139135)

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  2 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.td     | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9c7aedf5acaf2..c726fa74b922e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -24502,7 +24502,7 @@ RISCVTargetLowering::emitDynamicProbedAlloc(MachineInstr &MI,
   MachineFunction &MF = *MBB->getParent();
   MachineBasicBlock::iterator MBBI = MI.getIterator();
   DebugLoc DL = MBB->findDebugLoc(MBBI);
-  Register TargetReg = MI.getOperand(1).getReg();
+  Register TargetReg = MI.getOperand(0).getReg();
 
   const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
   bool IsRV64 = Subtarget.is64Bit();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 4a4290483e94b..91903a9ea1f78 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -103,8 +103,7 @@ def riscv_add_tprel : SDNode<"RISCVISD::ADD_TPREL",
                                                   SDTCisInt<0>]>>;
 
 def riscv_probed_alloca : SDNode<"RISCVISD::PROBED_ALLOCA",
-                                 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
-                                                      SDTCisVT<0, i32>]>,
+                                 SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
                                  [SDNPHasChain, SDNPMayStore]>;
 
 //===----------------------------------------------------------------------===//
@@ -1456,17 +1455,17 @@ let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
 // Probed stack allocation of a constant size, used in function prologues when
 // stack-clash protection is enabled.
 def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
-                               (ins GPR:$scratch),
+                               (ins GPR:$target),
                                []>,
                                Sched<[]>;
 def PROBED_STACKALLOC_RVV : Pseudo<(outs GPR:$sp),
-                               (ins GPR:$scratch),
+                               (ins GPR:$target),
                                []>,
                                Sched<[]>;
 let usesCustomInserter = 1 in
-def PROBED_STACKALLOC_DYN : Pseudo<(outs GPR:$rd),
-                               (ins GPR:$scratch),
-                               [(set GPR:$rd, (riscv_probed_alloca GPR:$scratch))]>,
+def PROBED_STACKALLOC_DYN : Pseudo<(outs),
+                               (ins GPR:$target),
+                               [(riscv_probed_alloca GPR:$target)]>,
                                Sched<[]>;
 }
 

>From e4b4a939f8852b2d5aab0d5b999890c7ef85be18 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 8 May 2025 16:27:08 -0700
Subject: [PATCH 03/25] [MCP] Disable BackwardCopyPropagateBlock for copies
 with implicit registers. (#137687)

If there's an implicit-def of a super register, the propagation
must preserve this implicit-def. Knowing how and when to do this
may require target specific knowledge so just disable it for now.

Prior to 2def1c4, we checked that the copy had explicit 2 operands
when that was removed we started allowing implicit operands through.
This patch adds a check for implicit operands, but still allows
extra explicit operands which was the goal of 2def1c4.

Fixes #131478.
---
 llvm/lib/CodeGen/MachineCopyPropagation.cpp |  2 +-
 llvm/test/CodeGen/SystemZ/pr137687.mir      | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/pr137687.mir

diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 224588b9d52ed..6eab87c1292e0 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -1206,7 +1206,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
     // Ignore non-trivial COPYs.
     std::optional<DestSourcePair> CopyOperands =
         isCopyInstr(MI, *TII, UseCopyInstr);
-    if (CopyOperands) {
+    if (CopyOperands && MI.getNumImplicitOperands() == 0) {
       Register DefReg = CopyOperands->Destination->getReg();
       Register SrcReg = CopyOperands->Source->getReg();
 
diff --git a/llvm/test/CodeGen/SystemZ/pr137687.mir b/llvm/test/CodeGen/SystemZ/pr137687.mir
new file mode 100644
index 0000000000000..66038f9dce22f
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/pr137687.mir
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc %s -mtriple=s390x-unknown-linux-gnu -run-pass=machine-cp -verify-machineinstrs -o - | FileCheck %s
+
+---
+name:            t
+tracksRegLiveness: true
+noPhis:          true
+isSSA:           false
+noVRegs:         true
+hasFakeUses:     false
+tracksDebugUserValues: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: t
+    ; CHECK: renamable $r14d = LLILL 0
+    ; CHECK-NEXT: renamable $r12d = COPY killed renamable $r14d, implicit-def $r12q
+    ; CHECK-NEXT: Return implicit $r12q
+    renamable $r14d = LLILL 0
+    renamable $r12d = COPY killed renamable $r14d, implicit-def $r12q
+    Return implicit $r12q
+...

>From 501dcab68ea978d8a3dbc46a8030b9227c4720de Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 8 May 2025 16:27:36 -0700
Subject: [PATCH 04/25] [RISCV] Limit VLEN in getOptimalMemOpType to prevent
 creating invalid MVTs. (#139116)

We only guarantee that types that are 1024 bytes or smaller exist in the
MVT enum.

Fixes #139075.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  5 ++++-
 llvm/test/CodeGen/RISCV/rvv/pr139075.ll     | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/pr139075.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c726fa74b922e..8403b51483323 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -23380,7 +23380,10 @@ EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,
   // combining will typically form larger LMUL operations from the LMUL1
   // operations emitted here, and that's okay because combining isn't
   // introducing new memory operations; it's just merging existing ones.
-  const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
+  // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
+  const unsigned MinVLenInBytes =
+      std::min(Subtarget.getRealMinVLen() / 8, 1024U);
+
   if (Op.size() < MinVLenInBytes)
     // TODO: Figure out short memops.  For the moment, do the default thing
     // which ends up using scalar sequences.
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr139075.ll b/llvm/test/CodeGen/RISCV/rvv/pr139075.ll
new file mode 100644
index 0000000000000..33e8e13a21588
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr139075.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvl16384b | FileCheck %s
+
+define void @a(ptr %0, ptr %1) {
+; CHECK-LABEL: a:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a1)
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, 1024
+; CHECK-NEXT:    vle8.v v8, (a1)
+; CHECK-NEXT:    addi a0, a0, 1024
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %0, ptr align 4 %1, i64 2048, i1 false)
+  ret void
+}

>From 02139b140beff69edc376bd15a80e0670322085f Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Fri, 9 May 2025 07:40:02 +0800
Subject: [PATCH 05/25] MIPSr6: Set FMAXNUM and FMINNUM as Legal (#139009)

Now we define FMAXNUM and FMINNUM as IEEE754-2008 with +0.0>-0.0.
MIPSr6's fmax/fmin just follow this rules full.

FMAXNUM_IEEE and FMINNUM_IEEE will be removed in future once:
1. Fixes FMAXNUM/FMINNUM for all targets
2. The use of FMAXNUM_IEEE/FMINNUM_IEEE are not used by middle end
anymore.
---
 llvm/lib/Target/Mips/Mips32r6InstrInfo.td  |  12 ++
 llvm/lib/Target/Mips/MipsISelLowering.cpp  |   8 +-
 llvm/test/CodeGen/Mips/mipsr6-minmaxnum.ll |  85 ++++-----
 llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll  | 202 ++++++++++++---------
 4 files changed, 170 insertions(+), 137 deletions(-)

diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index 27b9ce60ba826..fead376b8c338 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -1122,15 +1122,27 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def : MipsPat<(fmaxnum_ieee f32:$lhs, f32:$rhs),
                 (MAX_S   f32:$lhs, f32:$rhs)>,
                 ISA_MIPS32R6;
+  def : MipsPat<(fmaxnum f32:$lhs, f32:$rhs),
+                (MAX_S   f32:$lhs, f32:$rhs)>,
+                ISA_MIPS32R6;
   def : MipsPat<(fmaxnum_ieee f64:$lhs, f64:$rhs),
                 (MAX_D   f64:$lhs, f64:$rhs)>,
                 ISA_MIPS32R6;
+  def : MipsPat<(fmaxnum f64:$lhs, f64:$rhs),
+                (MAX_D   f64:$lhs, f64:$rhs)>,
+                ISA_MIPS32R6;
   def : MipsPat<(fminnum_ieee f32:$lhs, f32:$rhs),
                 (MIN_S   f32:$lhs, f32:$rhs)>,
                 ISA_MIPS32R6;
+  def : MipsPat<(fminnum f32:$lhs, f32:$rhs),
+                (MIN_S   f32:$lhs, f32:$rhs)>,
+                ISA_MIPS32R6;
   def : MipsPat<(fminnum_ieee f64:$lhs, f64:$rhs),
                 (MIN_D   f64:$lhs, f64:$rhs)>,
                 ISA_MIPS32R6;
+  def : MipsPat<(fminnum f64:$lhs, f64:$rhs),
+                (MIN_D   f64:$lhs, f64:$rhs)>,
+                ISA_MIPS32R6;
   def : MipsPat<(f32 (fcanonicalize f32:$src)),
                 (MIN_S   f32:$src, f32:$src)>,
                 ISA_MIPS32R6;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 72f21a0d08b3c..66cbf79a453a6 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -365,12 +365,12 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   if (Subtarget.hasMips32r6()) {
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f32, Expand);
-    setOperationAction(ISD::FMAXNUM, MVT::f32, Expand);
+    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f64, Expand);
-    setOperationAction(ISD::FMAXNUM, MVT::f64, Expand);
+    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
   } else {
diff --git a/llvm/test/CodeGen/Mips/mipsr6-minmaxnum.ll b/llvm/test/CodeGen/Mips/mipsr6-minmaxnum.ll
index 2a0ad07474c09..4c337392a7a26 100644
--- a/llvm/test/CodeGen/Mips/mipsr6-minmaxnum.ll
+++ b/llvm/test/CodeGen/Mips/mipsr6-minmaxnum.ll
@@ -1,80 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc %s -mtriple=mipsisa32r6el-linux-gnu -o - | \
 ; RUN:     FileCheck %s --check-prefix=MIPS32R6EL
 ; RUN: llc %s -mtriple=mipsisa64r6el-linux-gnuabi64 -o - | \
 ; RUN:     FileCheck %s --check-prefix=MIPS64R6EL
 
 define float @mins(float %x, float %y) {
-; MIPS32R6EL-LABEL:	mins
-; MIPS32R6EL:		# %bb.0:
-; MIPS32R6EL-NEXT:	min.s	$f0, $f14, $f14
-; MIPS32R6EL-NEXT:	min.s	$f1, $f12, $f12
-; MIPS32R6EL-NEXT:	jr	$ra
-; MIPS32R6EL-NEXT:	min.s	$f0, $f1, $f0
+; MIPS32R6EL-LABEL: mins:
+; MIPS32R6EL:       # %bb.0:
+; MIPS32R6EL-NEXT:    jr $ra
+; MIPS32R6EL-NEXT:    min.s $f0, $f12, $f14
 ;
-; MIPS64R6EL-LABEL:	mins
-; MIPS64R6EL:		# %bb.0:
-; MIPS64R6EL-NEXT:	min.s	$f0, $f13, $f13
-; MIPS64R6EL-NEXT:	min.s	$f1, $f12, $f12
-; MIPS64R6EL-NEXT:	jr	$ra
-; MIPS64R6EL-NEXT:	min.s	$f0, $f1, $f0
-
+; MIPS64R6EL-LABEL: mins:
+; MIPS64R6EL:       # %bb.0:
+; MIPS64R6EL-NEXT:    jr $ra
+; MIPS64R6EL-NEXT:    min.s $f0, $f12, $f13
   %r = tail call float @llvm.minnum.f32(float %x, float %y)
   ret float %r
 }
 
 define float @maxs(float %x, float %y) {
-; MIPS32R6EL-LABEL:	maxs
-; MIPS32R6EL:		# %bb.0:
-; MIPS32R6EL-NEXT:	min.s	$f0, $f14, $f14
-; MIPS32R6EL-NEXT:	min.s	$f1, $f12, $f12
-; MIPS32R6EL-NEXT:	jr	$ra
-; MIPS32R6EL-NEXT:	max.s	$f0, $f1, $f0
+; MIPS32R6EL-LABEL: maxs:
+; MIPS32R6EL:       # %bb.0:
+; MIPS32R6EL-NEXT:    jr $ra
+; MIPS32R6EL-NEXT:    max.s $f0, $f12, $f14
 ;
-; MIPS64R6EL-LABEL:	maxs
-; MIPS64R6EL:		# %bb.0:
-; MIPS64R6EL-NEXT:	min.s	$f0, $f13, $f13
-; MIPS64R6EL-NEXT:	min.s	$f1, $f12, $f12
-; MIPS64R6EL-NEXT:	jr	$ra
-; MIPS64R6EL-NEXT:	max.s	$f0, $f1, $f0
-
+; MIPS64R6EL-LABEL: maxs:
+; MIPS64R6EL:       # %bb.0:
+; MIPS64R6EL-NEXT:    jr $ra
+; MIPS64R6EL-NEXT:    max.s $f0, $f12, $f13
   %r = tail call float @llvm.maxnum.f32(float %x, float %y)
   ret float %r
 }
 
 define double @mind(double %x, double %y) {
-; MIPS32R6EL-LABEL:	mind
-; MIPS32R6EL:		# %bb.0:
-; MIPS32R6EL-NEXT:	min.d	$f0, $f14, $f14
-; MIPS32R6EL-NEXT:	min.d	$f1, $f12, $f12
-; MIPS32R6EL-NEXT:	jr	$ra
-; MIPS32R6EL-NEXT:	min.d	$f0, $f1, $f0
+; MIPS32R6EL-LABEL: mind:
+; MIPS32R6EL:       # %bb.0:
+; MIPS32R6EL-NEXT:    jr $ra
+; MIPS32R6EL-NEXT:    min.d $f0, $f12, $f14
 ;
-; MIPS64R6EL-LABEL:	mind
-; MIPS64R6EL:		# %bb.0:
-; MIPS64R6EL-NEXT:	min.d	$f0, $f13, $f13
-; MIPS64R6EL-NEXT:	min.d	$f1, $f12, $f12
-; MIPS64R6EL-NEXT:	jr	$ra
-; MIPS64R6EL-NEXT:	min.d	$f0, $f1, $f0
-
+; MIPS64R6EL-LABEL: mind:
+; MIPS64R6EL:       # %bb.0:
+; MIPS64R6EL-NEXT:    jr $ra
+; MIPS64R6EL-NEXT:    min.d $f0, $f12, $f13
   %r = tail call double @llvm.minnum.f64(double %x, double %y)
   ret double %r
 }
 
 define double @maxd(double %x, double %y) {
-; MIPS32R6EL-LABEL:	maxd
-; MIPS32R6EL:		# %bb.0:
-; MIPS32R6EL-NEXT:	min.d	$f0, $f14, $f14
-; MIPS32R6EL-NEXT:	min.d	$f1, $f12, $f12
-; MIPS32R6EL-NEXT:	jr	$ra
-; MIPS32R6EL-NEXT:	max.d	$f0, $f1, $f0
+; MIPS32R6EL-LABEL: maxd:
+; MIPS32R6EL:       # %bb.0:
+; MIPS32R6EL-NEXT:    jr $ra
+; MIPS32R6EL-NEXT:    max.d $f0, $f12, $f14
 ;
-; MIPS64R6EL-LABEL:	maxd
-; MIPS64R6EL:		# %bb.0:
-; MIPS64R6EL-NEXT:	min.d	$f0, $f13, $f13
-; MIPS64R6EL-NEXT:	min.d	$f1, $f12, $f12
-; MIPS64R6EL-NEXT:	jr	$ra
-; MIPS64R6EL-NEXT:	max.d	$f0, $f1, $f0
-
+; MIPS64R6EL-LABEL: maxd:
+; MIPS64R6EL:       # %bb.0:
+; MIPS64R6EL-NEXT:    jr $ra
+; MIPS64R6EL-NEXT:    max.d $f0, $f12, $f13
   %r = tail call double @llvm.maxnum.f64(double %x, double %y)
   ret double %r
 }
diff --git a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll
index 42b0f69181ab7..84d7c9688d239 100644
--- a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll
+++ b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll
@@ -396,57 +396,103 @@ define void @uitofp(i32 %a) {
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    addiu $sp, $sp, 8
 ;
-; MIPS64-N32-LABEL: uitofp:
-; MIPS64-N32:       # %bb.0: # %entry
-; MIPS64-N32-NEXT:    addiu $sp, $sp, -16
-; MIPS64-N32-NEXT:    .cfi_def_cfa_offset 16
-; MIPS64-N32-NEXT:    lui $1, %hi(%neg(%gp_rel(uitofp)))
-; MIPS64-N32-NEXT:    addu $1, $1, $25
-; MIPS64-N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(uitofp)))
-; MIPS64-N32-NEXT:    lui $2, 17200
-; MIPS64-N32-NEXT:    sw $2, 12($sp)
+; MIPS64R5-N32-LABEL: uitofp:
+; MIPS64R5-N32:       # %bb.0: # %entry
+; MIPS64R5-N32-NEXT:    addiu $sp, $sp, -16
+; MIPS64R5-N32-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-N32-NEXT:    lui $1, %hi(%neg(%gp_rel(uitofp)))
+; MIPS64R5-N32-NEXT:    addu $1, $1, $25
+; MIPS64R5-N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(uitofp)))
+; MIPS64R5-N32-NEXT:    lui $2, 17200
+; MIPS64R5-N32-NEXT:    sw $2, 12($sp)
 ; MIPS64R5-N32-NEXT:    sll $2, $4, 0
 ; MIPS64R5-N32-NEXT:    sw $2, 8($sp)
-; MIPSR6-N32-NEXT:    sw $4, 8($sp)
-; MIPS64-N32-NEXT:    lw $2, %got_page(.LCPI5_0)($1)
-; MIPS64-N32-NEXT:    ldc1 $f0, %got_ofst(.LCPI5_0)($2)
-; MIPS64-N32-NEXT:    ldc1 $f1, 8($sp)
-; MIPS64-N32-NEXT:    sub.d $f0, $f1, $f0
-; MIPS64-N32-NEXT:    dmfc1 $2, $f0
-; MIPS64-N32-NEXT:    fill.d $w0, $2
-; MIPS64-N32-NEXT:    fexdo.w $w0, $w0, $w0
-; MIPS64-N32-NEXT:    fexdo.h $w0, $w0, $w0
-; MIPS64-N32-NEXT:    lw $1, %got_disp(h)($1)
-; MIPS64-N32-NEXT:    copy_u.h $2, $w0[0]
-; MIPS64-N32-NEXT:    sh $2, 0($1)
-; MIPS64-N32-NEXT:    jr $ra
-; MIPS64-N32-NEXT:    addiu $sp, $sp, 16
+; MIPS64R5-N32-NEXT:    lw $2, %got_page(.LCPI5_0)($1)
+; MIPS64R5-N32-NEXT:    ldc1 $f0, %got_ofst(.LCPI5_0)($2)
+; MIPS64R5-N32-NEXT:    ldc1 $f1, 8($sp)
+; MIPS64R5-N32-NEXT:    sub.d $f0, $f1, $f0
+; MIPS64R5-N32-NEXT:    dmfc1 $2, $f0
+; MIPS64R5-N32-NEXT:    fill.d $w0, $2
+; MIPS64R5-N32-NEXT:    fexdo.w $w0, $w0, $w0
+; MIPS64R5-N32-NEXT:    fexdo.h $w0, $w0, $w0
+; MIPS64R5-N32-NEXT:    lw $1, %got_disp(h)($1)
+; MIPS64R5-N32-NEXT:    copy_u.h $2, $w0[0]
+; MIPS64R5-N32-NEXT:    sh $2, 0($1)
+; MIPS64R5-N32-NEXT:    jr $ra
+; MIPS64R5-N32-NEXT:    addiu $sp, $sp, 16
 ;
-; MIPS64-N64-LABEL: uitofp:
-; MIPS64-N64:       # %bb.0: # %entry
-; MIPS64-N64-NEXT:    daddiu $sp, $sp, -16
-; MIPS64-N64-NEXT:    .cfi_def_cfa_offset 16
-; MIPS64-N64-NEXT:    lui $1, %hi(%neg(%gp_rel(uitofp)))
-; MIPS64-N64-NEXT:    daddu $1, $1, $25
-; MIPS64-N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(uitofp)))
-; MIPS64-N64-NEXT:    lui $2, 17200
-; MIPS64-N64-NEXT:    sw $2, 12($sp)
+; MIPS64R5-N64-LABEL: uitofp:
+; MIPS64R5-N64:       # %bb.0: # %entry
+; MIPS64R5-N64-NEXT:    daddiu $sp, $sp, -16
+; MIPS64R5-N64-NEXT:    .cfi_def_cfa_offset 16
+; MIPS64R5-N64-NEXT:    lui $1, %hi(%neg(%gp_rel(uitofp)))
+; MIPS64R5-N64-NEXT:    daddu $1, $1, $25
+; MIPS64R5-N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(uitofp)))
+; MIPS64R5-N64-NEXT:    lui $2, 17200
+; MIPS64R5-N64-NEXT:    sw $2, 12($sp)
 ; MIPS64R5-N64-NEXT:    sll $2, $4, 0
 ; MIPS64R5-N64-NEXT:    sw $2, 8($sp)
+; MIPS64R5-N64-NEXT:    ld $2, %got_page(.LCPI5_0)($1)
+; MIPS64R5-N64-NEXT:    ldc1 $f0, %got_ofst(.LCPI5_0)($2)
+; MIPS64R5-N64-NEXT:    ldc1 $f1, 8($sp)
+; MIPS64R5-N64-NEXT:    sub.d $f0, $f1, $f0
+; MIPS64R5-N64-NEXT:    dmfc1 $2, $f0
+; MIPS64R5-N64-NEXT:    fill.d $w0, $2
+; MIPS64R5-N64-NEXT:    fexdo.w $w0, $w0, $w0
+; MIPS64R5-N64-NEXT:    fexdo.h $w0, $w0, $w0
+; MIPS64R5-N64-NEXT:    ld $1, %got_disp(h)($1)
+; MIPS64R5-N64-NEXT:    copy_u.h $2, $w0[0]
+; MIPS64R5-N64-NEXT:    sh $2, 0($1)
+; MIPS64R5-N64-NEXT:    jr $ra
+; MIPS64R5-N64-NEXT:    daddiu $sp, $sp, 16
+;
+; MIPSR6-N32-LABEL: uitofp:
+; MIPSR6-N32:       # %bb.0: # %entry
+; MIPSR6-N32-NEXT:    addiu $sp, $sp, -16
+; MIPSR6-N32-NEXT:    .cfi_def_cfa_offset 16
+; MIPSR6-N32-NEXT:    lui $1, %hi(%neg(%gp_rel(uitofp)))
+; MIPSR6-N32-NEXT:    addu $1, $1, $25
+; MIPSR6-N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(uitofp)))
+; MIPSR6-N32-NEXT:    lui $2, 17200
+; MIPSR6-N32-NEXT:    sw $2, 12($sp)
+; MIPSR6-N32-NEXT:    sw $4, 8($sp)
+; MIPSR6-N32-NEXT:    lw $2, %got_page(.LCPI5_0)($1)
+; MIPSR6-N32-NEXT:    ldc1 $f0, %got_ofst(.LCPI5_0)($2)
+; MIPSR6-N32-NEXT:    ldc1 $f1, 8($sp)
+; MIPSR6-N32-NEXT:    sub.d $f0, $f1, $f0
+; MIPSR6-N32-NEXT:    dmfc1 $2, $f0
+; MIPSR6-N32-NEXT:    fill.d $w0, $2
+; MIPSR6-N32-NEXT:    fexdo.w $w0, $w0, $w0
+; MIPSR6-N32-NEXT:    fexdo.h $w0, $w0, $w0
+; MIPSR6-N32-NEXT:    lw $1, %got_disp(h)($1)
+; MIPSR6-N32-NEXT:    copy_u.h $2, $w0[0]
+; MIPSR6-N32-NEXT:    sh $2, 0($1)
+; MIPSR6-N32-NEXT:    jr $ra
+; MIPSR6-N32-NEXT:    addiu $sp, $sp, 16
+;
+; MIPSR6-N64-LABEL: uitofp:
+; MIPSR6-N64:       # %bb.0: # %entry
+; MIPSR6-N64-NEXT:    daddiu $sp, $sp, -16
+; MIPSR6-N64-NEXT:    .cfi_def_cfa_offset 16
+; MIPSR6-N64-NEXT:    lui $1, %hi(%neg(%gp_rel(uitofp)))
+; MIPSR6-N64-NEXT:    daddu $1, $1, $25
+; MIPSR6-N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(uitofp)))
+; MIPSR6-N64-NEXT:    lui $2, 17200
+; MIPSR6-N64-NEXT:    sw $2, 12($sp)
 ; MIPSR6-N64-NEXT:    sw $4, 8($sp)
-; MIPS64-N64-NEXT:    ld $2, %got_page(.LCPI5_0)($1)
-; MIPS64-N64-NEXT:    ldc1 $f0, %got_ofst(.LCPI5_0)($2)
-; MIPS64-N64-NEXT:    ldc1 $f1, 8($sp)
-; MIPS64-N64-NEXT:    sub.d $f0, $f1, $f0
-; MIPS64-N64-NEXT:    dmfc1 $2, $f0
-; MIPS64-N64-NEXT:    fill.d $w0, $2
-; MIPS64-N64-NEXT:    fexdo.w $w0, $w0, $w0
-; MIPS64-N64-NEXT:    fexdo.h $w0, $w0, $w0
-; MIPS64-N64-NEXT:    ld $1, %got_disp(h)($1)
-; MIPS64-N64-NEXT:    copy_u.h $2, $w0[0]
-; MIPS64-N64-NEXT:    sh $2, 0($1)
-; MIPS64-N64-NEXT:    jr $ra
-; MIPS64-N64-NEXT:    daddiu $sp, $sp, 16
+; MIPSR6-N64-NEXT:    ld $2, %got_page(.LCPI5_0)($1)
+; MIPSR6-N64-NEXT:    ldc1 $f0, %got_ofst(.LCPI5_0)($2)
+; MIPSR6-N64-NEXT:    ldc1 $f1, 8($sp)
+; MIPSR6-N64-NEXT:    sub.d $f0, $f1, $f0
+; MIPSR6-N64-NEXT:    dmfc1 $2, $f0
+; MIPSR6-N64-NEXT:    fill.d $w0, $2
+; MIPSR6-N64-NEXT:    fexdo.w $w0, $w0, $w0
+; MIPSR6-N64-NEXT:    fexdo.h $w0, $w0, $w0
+; MIPSR6-N64-NEXT:    ld $1, %got_disp(h)($1)
+; MIPSR6-N64-NEXT:    copy_u.h $2, $w0[0]
+; MIPSR6-N64-NEXT:    sh $2, 0($1)
+; MIPSR6-N64-NEXT:    jr $ra
+; MIPSR6-N64-NEXT:    daddiu $sp, $sp, 16
 entry:
 
 
@@ -2466,14 +2512,13 @@ define void @fminnum(float %b) {
 ; MIPSR6-O32-NEXT:    lui $2, %hi(_gp_disp)
 ; MIPSR6-O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
 ; MIPSR6-O32-NEXT:    addu $1, $2, $25
-; MIPSR6-O32-NEXT:    min.s $f0, $f12, $f12
 ; MIPSR6-O32-NEXT:    lw $1, %got(g)($1)
 ; MIPSR6-O32-NEXT:    lh $2, 0($1)
-; MIPSR6-O32-NEXT:    fill.h $w1, $2
-; MIPSR6-O32-NEXT:    fexupr.w $w1, $w1
-; MIPSR6-O32-NEXT:    copy_s.w $2, $w1[0]
-; MIPSR6-O32-NEXT:    mtc1 $2, $f1
-; MIPSR6-O32-NEXT:    min.s $f0, $f1, $f0
+; MIPSR6-O32-NEXT:    fill.h $w0, $2
+; MIPSR6-O32-NEXT:    fexupr.w $w0, $w0
+; MIPSR6-O32-NEXT:    copy_s.w $2, $w0[0]
+; MIPSR6-O32-NEXT:    mtc1 $2, $f0
+; MIPSR6-O32-NEXT:    min.s $f0, $f0, $f12
 ; MIPSR6-O32-NEXT:    mfc1 $2, $f0
 ; MIPSR6-O32-NEXT:    fill.w $w0, $2
 ; MIPSR6-O32-NEXT:    fexdo.h $w0, $w0, $w0
@@ -2486,14 +2531,13 @@ define void @fminnum(float %b) {
 ; MIPSR6-N32-NEXT:    lui $1, %hi(%neg(%gp_rel(fminnum)))
 ; MIPSR6-N32-NEXT:    addu $1, $1, $25
 ; MIPSR6-N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(fminnum)))
-; MIPSR6-N32-NEXT:    min.s $f0, $f12, $f12
 ; MIPSR6-N32-NEXT:    lw $1, %got_disp(g)($1)
 ; MIPSR6-N32-NEXT:    lh $2, 0($1)
-; MIPSR6-N32-NEXT:    fill.h $w1, $2
-; MIPSR6-N32-NEXT:    fexupr.w $w1, $w1
-; MIPSR6-N32-NEXT:    copy_s.w $2, $w1[0]
-; MIPSR6-N32-NEXT:    mtc1 $2, $f1
-; MIPSR6-N32-NEXT:    min.s $f0, $f1, $f0
+; MIPSR6-N32-NEXT:    fill.h $w0, $2
+; MIPSR6-N32-NEXT:    fexupr.w $w0, $w0
+; MIPSR6-N32-NEXT:    copy_s.w $2, $w0[0]
+; MIPSR6-N32-NEXT:    mtc1 $2, $f0
+; MIPSR6-N32-NEXT:    min.s $f0, $f0, $f12
 ; MIPSR6-N32-NEXT:    mfc1 $2, $f0
 ; MIPSR6-N32-NEXT:    fill.w $w0, $2
 ; MIPSR6-N32-NEXT:    fexdo.h $w0, $w0, $w0
@@ -2506,14 +2550,13 @@ define void @fminnum(float %b) {
 ; MIPSR6-N64-NEXT:    lui $1, %hi(%neg(%gp_rel(fminnum)))
 ; MIPSR6-N64-NEXT:    daddu $1, $1, $25
 ; MIPSR6-N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(fminnum)))
-; MIPSR6-N64-NEXT:    min.s $f0, $f12, $f12
 ; MIPSR6-N64-NEXT:    ld $1, %got_disp(g)($1)
 ; MIPSR6-N64-NEXT:    lh $2, 0($1)
-; MIPSR6-N64-NEXT:    fill.h $w1, $2
-; MIPSR6-N64-NEXT:    fexupr.w $w1, $w1
-; MIPSR6-N64-NEXT:    copy_s.w $2, $w1[0]
-; MIPSR6-N64-NEXT:    mtc1 $2, $f1
-; MIPSR6-N64-NEXT:    min.s $f0, $f1, $f0
+; MIPSR6-N64-NEXT:    fill.h $w0, $2
+; MIPSR6-N64-NEXT:    fexupr.w $w0, $w0
+; MIPSR6-N64-NEXT:    copy_s.w $2, $w0[0]
+; MIPSR6-N64-NEXT:    mtc1 $2, $f0
+; MIPSR6-N64-NEXT:    min.s $f0, $f0, $f12
 ; MIPSR6-N64-NEXT:    mfc1 $2, $f0
 ; MIPSR6-N64-NEXT:    fill.w $w0, $2
 ; MIPSR6-N64-NEXT:    fexdo.h $w0, $w0, $w0
@@ -2638,14 +2681,13 @@ define void @fmaxnum(float %b) {
 ; MIPSR6-O32-NEXT:    lui $2, %hi(_gp_disp)
 ; MIPSR6-O32-NEXT:    addiu $2, $2, %lo(_gp_disp)
 ; MIPSR6-O32-NEXT:    addu $1, $2, $25
-; MIPSR6-O32-NEXT:    min.s $f0, $f12, $f12
 ; MIPSR6-O32-NEXT:    lw $1, %got(g)($1)
 ; MIPSR6-O32-NEXT:    lh $2, 0($1)
-; MIPSR6-O32-NEXT:    fill.h $w1, $2
-; MIPSR6-O32-NEXT:    fexupr.w $w1, $w1
-; MIPSR6-O32-NEXT:    copy_s.w $2, $w1[0]
-; MIPSR6-O32-NEXT:    mtc1 $2, $f1
-; MIPSR6-O32-NEXT:    max.s $f0, $f1, $f0
+; MIPSR6-O32-NEXT:    fill.h $w0, $2
+; MIPSR6-O32-NEXT:    fexupr.w $w0, $w0
+; MIPSR6-O32-NEXT:    copy_s.w $2, $w0[0]
+; MIPSR6-O32-NEXT:    mtc1 $2, $f0
+; MIPSR6-O32-NEXT:    max.s $f0, $f0, $f12
 ; MIPSR6-O32-NEXT:    mfc1 $2, $f0
 ; MIPSR6-O32-NEXT:    fill.w $w0, $2
 ; MIPSR6-O32-NEXT:    fexdo.h $w0, $w0, $w0
@@ -2658,14 +2700,13 @@ define void @fmaxnum(float %b) {
 ; MIPSR6-N32-NEXT:    lui $1, %hi(%neg(%gp_rel(fmaxnum)))
 ; MIPSR6-N32-NEXT:    addu $1, $1, $25
 ; MIPSR6-N32-NEXT:    addiu $1, $1, %lo(%neg(%gp_rel(fmaxnum)))
-; MIPSR6-N32-NEXT:    min.s $f0, $f12, $f12
 ; MIPSR6-N32-NEXT:    lw $1, %got_disp(g)($1)
 ; MIPSR6-N32-NEXT:    lh $2, 0($1)
-; MIPSR6-N32-NEXT:    fill.h $w1, $2
-; MIPSR6-N32-NEXT:    fexupr.w $w1, $w1
-; MIPSR6-N32-NEXT:    copy_s.w $2, $w1[0]
-; MIPSR6-N32-NEXT:    mtc1 $2, $f1
-; MIPSR6-N32-NEXT:    max.s $f0, $f1, $f0
+; MIPSR6-N32-NEXT:    fill.h $w0, $2
+; MIPSR6-N32-NEXT:    fexupr.w $w0, $w0
+; MIPSR6-N32-NEXT:    copy_s.w $2, $w0[0]
+; MIPSR6-N32-NEXT:    mtc1 $2, $f0
+; MIPSR6-N32-NEXT:    max.s $f0, $f0, $f12
 ; MIPSR6-N32-NEXT:    mfc1 $2, $f0
 ; MIPSR6-N32-NEXT:    fill.w $w0, $2
 ; MIPSR6-N32-NEXT:    fexdo.h $w0, $w0, $w0
@@ -2678,14 +2719,13 @@ define void @fmaxnum(float %b) {
 ; MIPSR6-N64-NEXT:    lui $1, %hi(%neg(%gp_rel(fmaxnum)))
 ; MIPSR6-N64-NEXT:    daddu $1, $1, $25
 ; MIPSR6-N64-NEXT:    daddiu $1, $1, %lo(%neg(%gp_rel(fmaxnum)))
-; MIPSR6-N64-NEXT:    min.s $f0, $f12, $f12
 ; MIPSR6-N64-NEXT:    ld $1, %got_disp(g)($1)
 ; MIPSR6-N64-NEXT:    lh $2, 0($1)
-; MIPSR6-N64-NEXT:    fill.h $w1, $2
-; MIPSR6-N64-NEXT:    fexupr.w $w1, $w1
-; MIPSR6-N64-NEXT:    copy_s.w $2, $w1[0]
-; MIPSR6-N64-NEXT:    mtc1 $2, $f1
-; MIPSR6-N64-NEXT:    max.s $f0, $f1, $f0
+; MIPSR6-N64-NEXT:    fill.h $w0, $2
+; MIPSR6-N64-NEXT:    fexupr.w $w0, $w0
+; MIPSR6-N64-NEXT:    copy_s.w $2, $w0[0]
+; MIPSR6-N64-NEXT:    mtc1 $2, $f0
+; MIPSR6-N64-NEXT:    max.s $f0, $f0, $f12
 ; MIPSR6-N64-NEXT:    mfc1 $2, $f0
 ; MIPSR6-N64-NEXT:    fill.w $w0, $2
 ; MIPSR6-N64-NEXT:    fexdo.h $w0, $w0, $w0

>From abd5ee9355c2d3fd0df8bb0e57a0f2cf2f9fb435 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov <mizvekov at gmail.com>
Date: Thu, 8 May 2025 20:56:48 -0300
Subject: [PATCH 06/25] Revert "[Clang] Diagnose invalid function types in
 dependent contexts (#138731)" (#139176)

This reverts commit cf9b4d1e7961214deabd99a9fc3b1d4c9e78a71f.

Causes breakages as reported here:
https://github.com/llvm/llvm-project/pull/138731#issuecomment-2864298000
---
 clang/docs/ReleaseNotes.rst                  |  3 +-
 clang/lib/Sema/SemaExpr.cpp                  | 18 -------
 clang/test/SemaTemplate/fun-template-def.cpp | 51 +-------------------
 3 files changed, 2 insertions(+), 70 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 52fa8df4d51d1..1f0dbe565db6b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -566,7 +566,7 @@ Bug Fixes in This Version
 - Fixed a bug where an attribute before a ``pragma clang attribute`` or
   ``pragma clang __debug`` would cause an assertion. Instead, this now diagnoses
   the invalid attribute location appropriately. (#GH137861)
-- Fixed a crash when a malformed ``_Pragma`` directive appears as part of an 
+- Fixed a crash when a malformed ``_Pragma`` directive appears as part of an
   ``#include`` directive. (#GH138094)
 - Fixed a crash during constant evaluation involving invalid lambda captures
   (#GH138832)
@@ -675,7 +675,6 @@ Bug Fixes to C++ Support
 - Fixed an assertion when trying to constant-fold various builtins when the argument
   referred to a reference to an incomplete type. (#GH129397)
 - Fixed a crash when a cast involved a parenthesized aggregate initialization in dependent context. (#GH72880)
-- Fixed a crash when forming an invalid function type in a dependent context. (#GH138657) (#GH115725) (#GH68852)
 - No longer crashes when instantiating invalid variable template specialization
   whose type depends on itself. (#GH51347), (#GH55872)
 - Improved parser recovery of invalid requirement expressions. In turn, this
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 57135adf714ce..deb8d2edfc5c9 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6550,15 +6550,6 @@ ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
   return Call;
 }
 
-// Any type that could be used to form a callable expression
-static bool MayBeFunctionType(const ASTContext &Context, QualType T) {
-  return T == Context.BoundMemberTy || T == Context.UnknownAnyTy ||
-         T == Context.BuiltinFnTy || T == Context.OverloadTy ||
-         T->isFunctionType() || T->isFunctionReferenceType() ||
-         T->isMemberFunctionPointerType() || T->isFunctionPointerType() ||
-         T->isBlockPointerType() || T->isRecordType();
-}
-
 ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
                                MultiExprArg ArgExprs, SourceLocation RParenLoc,
                                Expr *ExecConfig, bool IsExecConfig,
@@ -6612,15 +6603,6 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
             *this, dyn_cast<UnresolvedMemberExpr>(Fn->IgnoreParens()),
             Fn->getBeginLoc());
 
-        // If the type of the function itself is not dependent
-        // check that it is a reasonable as a function, as type deduction
-        // later assume the CallExpr has a sensible TYPE.
-        if (!Fn->getType()->isDependentType() &&
-            !MayBeFunctionType(Context, Fn->getType()))
-          return ExprError(
-              Diag(LParenLoc, diag::err_typecheck_call_not_function)
-              << Fn->getType() << Fn->getSourceRange());
-
         return CallExpr::Create(Context, Fn, ArgExprs, Context.DependentTy,
                                 VK_PRValue, RParenLoc, CurFPFeatureOverrides());
       }
diff --git a/clang/test/SemaTemplate/fun-template-def.cpp b/clang/test/SemaTemplate/fun-template-def.cpp
index 716296e72bc44..de77901b5b601 100644
--- a/clang/test/SemaTemplate/fun-template-def.cpp
+++ b/clang/test/SemaTemplate/fun-template-def.cpp
@@ -1,7 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
-// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s
 
 // Tests that dependent expressions are always allowed, whereas non-dependent
 // are checked as usual.
@@ -33,7 +32,7 @@ T f1(T t1, U u1, int i1, T** tpp)
   i1 = t1[u1];
   i1 *= t1;
 
-  i1(u1, t1); // expected-error {{called object type 'int' is not a function or function pointer}}
+  i1(u1, t1); // error
   u1(i1, t1);
 
   U u2 = (T)i1;
@@ -61,51 +60,3 @@ void f3() {
   f2<int*>(0);
   f2<int>(0); // expected-error {{no matching function for call to 'f2'}}
 }
-
-#if __cplusplus >= 202002L
-namespace GH138657 {
-template <auto V> // #gh138657-template-head
-class meta {};
-template<int N>
-class meta<N()> {}; // expected-error {{called object type 'int' is not a function or function point}}
-
-template<int N[1]>
-class meta<N()> {}; // expected-error {{called object type 'int *' is not a function or function point}}
-
-template<char* N>
-class meta<N()> {}; // expected-error {{called object type 'char *' is not a function or function point}}
-
-struct S {};
-template<S>
-class meta<S()> {}; // expected-error {{template argument for non-type template parameter is treated as function type 'S ()'}}
-                    // expected-note@#gh138657-template-head {{template parameter is declared here}}
-
-}
-
-namespace GH115725 {
-template<auto ...> struct X {};
-template<typename T, typename ...Ts> struct A {
-  template<Ts ...Ns, T *...Ps>
-  A(X<0(Ps)...>, Ts (*...qs)[Ns]);
-  // expected-error at -1{{called object type 'int' is not a function or function pointer}}
-
-};
-}
-
-namespace GH68852 {
-template <auto v>
-struct constexpr_value {
-  template <class... Ts>
-  constexpr constexpr_value<v(Ts::value...)> call(Ts...) {
-    //expected-error at -1 {{called object type 'int' is not a function or function pointer}}
-    return {};
-  }
-};
-
-template <auto v> constexpr static inline auto c_ = constexpr_value<v>{};
-// expected-note at -1 {{in instantiation of template}}
-auto k = c_<1>; // expected-note {{in instantiation of variable}}
-
-}
-
-#endif

>From b6922b717045d3d1bd136b96f672533a498fd5aa Mon Sep 17 00:00:00 2001
From: jimingham <jingham at apple.com>
Date: Thu, 8 May 2025 17:03:21 -0700
Subject: [PATCH 07/25] Add more logging so I can figure out why
 TestBranchIslands.py is (#139178)

failing but only on the bot.
---
 .../DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp    | 9 +++++++++
 lldb/test/API/macosx/branch-islands/TestBranchIslands.py | 9 ++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
index 578ab12268ea3..6c3040ef1a1da 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
@@ -1038,6 +1038,15 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
     static RegularExpression g_branch_island_regex(g_branch_island_pattern);
 
     bool is_branch_island = g_branch_island_regex.Execute(current_name);
+    // FIXME: this is extra logging so I can figure out why this test is failing
+    // on the bot but not locally with all the same tools, etc...
+    if (thread_plan_sp && is_branch_island) {
+      if (log) {
+        StreamString s;
+        thread_plan_sp->GetDescription(&s, eDescriptionLevelVerbose);
+        LLDB_LOGF(log, "Am at a branch island, but already had plan: \n\t%s", s.GetData());
+      }
+    }
     if (!thread_plan_sp && is_branch_island) {
       thread_plan_sp = std::make_shared<ThreadPlanStepInstruction>(
           thread,
diff --git a/lldb/test/API/macosx/branch-islands/TestBranchIslands.py b/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
index c79840b400432..a8dd1886d5568 100644
--- a/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
+++ b/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
@@ -2,7 +2,7 @@
 Make sure that we can step in across an arm64 branch island
 """
 
-
+import os
 import lldb
 import lldbsuite.test.lldbutil as lldbutil
 from lldbsuite.test.lldbtest import *
@@ -32,6 +32,9 @@ def do_test(self):
         trace_before = lldbutil.print_stacktrace(thread, True)
         func_before = thread.frames[0].function
 
+        log_file_path = os.path.join(self.getBuildDir(), "step-log.txt")
+        self.runCmd(f"log enable -f {log_file_path} lldb step")
+        
         thread.StepInto()
         stop_frame = thread.frames[0]
         # This is failing on the bot, but I can't reproduce the failure
@@ -59,6 +62,10 @@ def do_test(self):
             print(
                 f"\nStop disassembly:\n {lldbutil.disassemble(target, stop_frame.function)}"
             )
+            with open(log_file_path, "r") as f:
+                data = f.read()
+                print("Step Log:")
+                print(data)
 
         self.assertIn("foo", stop_frame.name, "Stepped into foo")
         var = stop_frame.FindVariable("a_variable_in_foo")

>From 2299bada9f163df64083434754ebc4b353f562d5 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Thu, 8 May 2025 17:20:18 -0700
Subject: [PATCH 08/25] Revert "[llvm-objdump] Add support for HIP offload
 bundles (#114834)"

This reverts commit 06d6623bc304d5fc2fe11b80b62b4c5d10f9eaa1.

Buildbot failure:
https://lab.llvm.org/buildbot/#/builders/145/builds/6871/steps/5/logs/stdio
---
 llvm/docs/CommandGuide/llvm-objdump.rst       |   2 +-
 llvm/include/llvm/Object/OffloadBundle.h      | 211 --------
 llvm/lib/Object/CMakeLists.txt                |   1 -
 llvm/lib/Object/OffloadBundle.cpp             | 478 ------------------
 .../tools/llvm-objdump/Offloading/fatbin.test |  60 ---
 llvm/tools/llvm-objdump/OffloadDump.cpp       |  46 +-
 llvm/tools/llvm-objdump/OffloadDump.h         |   6 +-
 llvm/tools/llvm-objdump/llvm-objdump.cpp      |   3 +-
 llvm/unittests/Object/CMakeLists.txt          |   1 -
 .../unittests/Object/OffloadingBundleTest.cpp |  89 ----
 10 files changed, 4 insertions(+), 893 deletions(-)
 delete mode 100644 llvm/include/llvm/Object/OffloadBundle.h
 delete mode 100644 llvm/lib/Object/OffloadBundle.cpp
 delete mode 100644 llvm/test/tools/llvm-objdump/Offloading/fatbin.test
 delete mode 100644 llvm/unittests/Object/OffloadingBundleTest.cpp

diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst
index 5e5eaccecd2b7..ab9f583e96ec6 100644
--- a/llvm/docs/CommandGuide/llvm-objdump.rst
+++ b/llvm/docs/CommandGuide/llvm-objdump.rst
@@ -217,7 +217,7 @@ OPTIONS
 
 .. option:: --offloading
 
-  Display the content of the LLVM offloading sections and HIP offload bundles.
+  Display the content of the LLVM offloading section.
 
 .. option:: --prefix=<prefix>
 
diff --git a/llvm/include/llvm/Object/OffloadBundle.h b/llvm/include/llvm/Object/OffloadBundle.h
deleted file mode 100644
index 7fc0ab141966a..0000000000000
--- a/llvm/include/llvm/Object/OffloadBundle.h
+++ /dev/null
@@ -1,211 +0,0 @@
-//===- OffloadBundle.h - Utilities for offload bundles---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===-------------------------------------------------------------------------===//
-//
-// This file contains the binary format used for budingling device metadata with
-// an associated device image. The data can then be stored inside a host object
-// file to create a fat binary and read by the linker. This is intended to be a
-// thin wrapper around the image itself. If this format becomes sufficiently
-// complex it should be moved to a standard binary format like msgpack or ELF.
-//
-//===-------------------------------------------------------------------------===//
-
-#ifndef LLVM_OBJECT_OFFLOADBUNDLE_H
-#define LLVM_OBJECT_OFFLOADBUNDLE_H
-
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Compression.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <memory>
-
-namespace llvm {
-
-namespace object {
-
-class CompressedOffloadBundle {
-private:
-  static inline const size_t MagicSize = 4;
-  static inline const size_t VersionFieldSize = sizeof(uint16_t);
-  static inline const size_t MethodFieldSize = sizeof(uint16_t);
-  static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
-  static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
-  static inline const size_t HashFieldSize = sizeof(uint64_t);
-  static inline const size_t V1HeaderSize =
-      MagicSize + VersionFieldSize + MethodFieldSize +
-      UncompressedSizeFieldSize + HashFieldSize;
-  static inline const size_t V2HeaderSize =
-      MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
-      UncompressedSizeFieldSize + HashFieldSize;
-  static inline const llvm::StringRef MagicNumber = "CCOB";
-  static inline const uint16_t Version = 2;
-
-public:
-  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-  compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
-           bool Verbose = false);
-  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-  decompress(llvm::MemoryBufferRef &Input, bool Verbose = false);
-};
-
-/// Bundle entry in binary clang-offload-bundler format.
-struct OffloadBundleEntry {
-  uint64_t Offset = 0u;
-  uint64_t Size = 0u;
-  uint64_t IDLength = 0u;
-  StringRef ID;
-  OffloadBundleEntry(uint64_t O, uint64_t S, uint64_t I, StringRef T)
-      : Offset(O), Size(S), IDLength(I), ID(T) {}
-  void dumpInfo(raw_ostream &OS) {
-    OS << "Offset = " << Offset << ", Size = " << Size
-       << ", ID Length = " << IDLength << ", ID = " << ID;
-  }
-  void dumpURI(raw_ostream &OS, StringRef FilePath) {
-    OS << ID.data() << "\tfile://" << FilePath << "#offset=" << Offset
-       << "&size=" << Size << "\n";
-  }
-};
-
-/// Fat binary embedded in object files in clang-offload-bundler format
-class OffloadBundleFatBin {
-
-  uint64_t Size = 0u;
-  StringRef FileName;
-  uint64_t NumberOfEntries;
-  SmallVector<OffloadBundleEntry> Entries;
-
-public:
-  SmallVector<OffloadBundleEntry> getEntries() { return Entries; }
-  uint64_t getSize() const { return Size; }
-  StringRef getFileName() const { return FileName; }
-  uint64_t getNumEntries() const { return NumberOfEntries; }
-
-  static Expected<std::unique_ptr<OffloadBundleFatBin>>
-  create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName);
-  Error extractBundle(const ObjectFile &Source);
-
-  Error dumpEntryToCodeObject();
-
-  Error readEntries(StringRef Section, uint64_t SectionOffset);
-  void dumpEntries() {
-    for (OffloadBundleEntry &Entry : Entries)
-      Entry.dumpInfo(outs());
-  }
-
-  void printEntriesAsURI() {
-    for (OffloadBundleEntry &Entry : Entries)
-      Entry.dumpURI(outs(), FileName);
-  }
-
-  OffloadBundleFatBin(MemoryBufferRef Source, StringRef File)
-      : FileName(File), NumberOfEntries(0),
-        Entries(SmallVector<OffloadBundleEntry>()) {}
-
-  SmallVector<OffloadBundleEntry> entryIDContains(StringRef Str) {
-
-    SmallVector<OffloadBundleEntry> Found = SmallVector<OffloadBundleEntry>();
-    llvm::transform(Entries, std::back_inserter(Found), [Str](auto &X) {
-      if (X.ID.contains(Str))
-        return X;
-    });
-    return Found;
-  }
-};
-
-enum UriTypeT { FILE_URI, MEMORY_URI };
-
-struct OffloadBundleURI {
-  int64_t Offset = 0;
-  int64_t Size = 0;
-  uint64_t ProcessID = 0;
-  StringRef FileName;
-  UriTypeT URIType;
-
-  // Constructors
-  // TODO: add a Copy ctor ?
-  OffloadBundleURI(StringRef File, int64_t Off, int64_t Size)
-      : Offset(Off), Size(Size), ProcessID(0), FileName(File),
-        URIType(FILE_URI) {}
-
-public:
-  static Expected<std::unique_ptr<OffloadBundleURI>>
-  createOffloadBundleURI(StringRef Str, UriTypeT Type) {
-    switch (Type) {
-    case FILE_URI:
-      return createFileURI(Str);
-      break;
-    case MEMORY_URI:
-      return createMemoryURI(Str);
-      break;
-    default:
-      return createStringError(object_error::parse_failed,
-                               "Unrecognized URI type");
-    }
-  }
-
-  static Expected<std::unique_ptr<OffloadBundleURI>>
-  createFileURI(StringRef Str) {
-    int64_t O = 0;
-    int64_t S = 0;
-
-    if (!Str.consume_front("file://"))
-      return createStringError(object_error::parse_failed,
-                               "Reading type of URI");
-
-    StringRef FilePathname =
-        Str.take_until([](char C) { return (C == '#') || (C == '?'); });
-    Str = Str.drop_front(FilePathname.size());
-
-    if (!Str.consume_front("#offset="))
-      return createStringError(object_error::parse_failed,
-                               "Reading 'offset' in URI");
-
-    StringRef OffsetStr = Str.take_until([](char C) { return C == '&'; });
-    OffsetStr.getAsInteger(10, O);
-    Str = Str.drop_front(OffsetStr.size());
-
-    if (Str.consume_front("&size="))
-      return createStringError(object_error::parse_failed,
-                               "Reading 'size' in URI");
-
-    Str.getAsInteger(10, S);
-    std::unique_ptr<OffloadBundleURI> OffloadingURI(
-        new OffloadBundleURI(FilePathname, O, S));
-    return OffloadingURI;
-  }
-
-  static Expected<std::unique_ptr<OffloadBundleURI>>
-  createMemoryURI(StringRef Str) {
-    // TODO: add parseMemoryURI type
-    return createStringError(object_error::parse_failed,
-                             "Memory Type URI is not currently supported.");
-  }
-
-  StringRef getFileName() const { return FileName; }
-};
-
-/// Extracts fat binary in binary clang-offload-bundler format from object \p
-/// Obj and return it in \p Bundles
-Error extractOffloadBundleFatBinary(
-    const ObjectFile &Obj, SmallVectorImpl<OffloadBundleFatBin> &Bundles);
-
-/// Extract code object memory from the given \p Source object file at \p Offset
-/// and of \p Size, and copy into \p OutputFileName.
-Error extractCodeObject(const ObjectFile &Source, int64_t Offset, int64_t Size,
-                        StringRef OutputFileName);
-
-/// Extracts an Offload Bundle Entry given by URI
-Error extractOffloadBundleByURI(StringRef URIstr);
-
-} // namespace object
-
-} // namespace llvm
-#endif
diff --git a/llvm/lib/Object/CMakeLists.txt b/llvm/lib/Object/CMakeLists.txt
index 870169a83174f..bfb420e57a7f4 100644
--- a/llvm/lib/Object/CMakeLists.txt
+++ b/llvm/lib/Object/CMakeLists.txt
@@ -22,7 +22,6 @@ add_llvm_component_library(LLVMObject
   Object.cpp
   ObjectFile.cpp
   OffloadBinary.cpp
-  OffloadBundle.cpp
   RecordStreamer.cpp
   RelocationResolver.cpp
   SymbolicFile.cpp
diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp
deleted file mode 100644
index b3d85be811a5a..0000000000000
--- a/llvm/lib/Object/OffloadBundle.cpp
+++ /dev/null
@@ -1,478 +0,0 @@
-//===- OffloadBundle.cpp - Utilities for offload bundles---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------===//
-
-#include "llvm/Object/OffloadBundle.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/BinaryFormat/Magic.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Object/Archive.h"
-#include "llvm/Object/Binary.h"
-#include "llvm/Object/COFF.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/Error.h"
-#include "llvm/Object/IRObjectFile.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Alignment.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/Timer.h"
-
-using namespace llvm;
-using namespace llvm::object;
-
-static llvm::TimerGroup
-    OffloadBundlerTimerGroup("Offload Bundler Timer Group",
-                             "Timer group for offload bundler");
-
-// Extract an Offload bundle (usually a Offload Bundle) from a fat_bin
-// section
-Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset,
-                           StringRef FileName,
-                           SmallVectorImpl<OffloadBundleFatBin> &Bundles) {
-
-  uint64_t Offset = 0;
-  int64_t NextbundleStart = 0;
-
-  // There could be multiple offloading bundles stored at this section.
-  while (NextbundleStart >= 0) {
-
-    std::unique_ptr<MemoryBuffer> Buffer =
-        MemoryBuffer::getMemBuffer(Contents.getBuffer().drop_front(Offset), "",
-                                   /*RequiresNullTerminator=*/false);
-
-    // Create the FatBinBindle object. This will also create the Bundle Entry
-    // list info.
-    auto FatBundleOrErr =
-        OffloadBundleFatBin::create(*Buffer, SectionOffset + Offset, FileName);
-    if (!FatBundleOrErr)
-      return FatBundleOrErr.takeError();
-
-    // Add current Bundle to list.
-    Bundles.emplace_back(std::move(**FatBundleOrErr));
-
-    // Find the next bundle by searching for the magic string
-    StringRef Str = Buffer->getBuffer();
-    NextbundleStart =
-        (int64_t)Str.find(StringRef("__CLANG_OFFLOAD_BUNDLE__"), 24);
-
-    if (NextbundleStart >= 0)
-      Offset += NextbundleStart;
-  }
-
-  return Error::success();
-}
-
-Error OffloadBundleFatBin::readEntries(StringRef Buffer,
-                                       uint64_t SectionOffset) {
-  uint64_t NumOfEntries = 0;
-
-  BinaryStreamReader Reader(Buffer, llvm::endianness::little);
-
-  // Read the Magic String first.
-  StringRef Magic;
-  if (auto EC = Reader.readFixedString(Magic, 24))
-    return errorCodeToError(object_error::parse_failed);
-
-  // Read the number of Code Objects (Entries) in the current Bundle.
-  if (auto EC = Reader.readInteger(NumOfEntries))
-    return errorCodeToError(object_error::parse_failed);
-
-  NumberOfEntries = NumOfEntries;
-
-  // For each Bundle Entry (code object)
-  for (uint64_t I = 0; I < NumOfEntries; I++) {
-    uint64_t EntrySize;
-    uint64_t EntryOffset;
-    uint64_t EntryIDSize;
-    StringRef EntryID;
-
-    if (auto EC = Reader.readInteger(EntryOffset))
-      return errorCodeToError(object_error::parse_failed);
-
-    if (auto EC = Reader.readInteger(EntrySize))
-      return errorCodeToError(object_error::parse_failed);
-
-    if (auto EC = Reader.readInteger(EntryIDSize))
-      return errorCodeToError(object_error::parse_failed);
-
-    if (auto EC = Reader.readFixedString(EntryID, EntryIDSize))
-      return errorCodeToError(object_error::parse_failed);
-
-    // Create a Bundle Entry object:
-    //    auto Entry = new OffloadBundleEntry(EntryOffset + SectionOffset,
-    //    EntrySize,
-    //                                        EntryIDSize, EntryID);
-    auto Entry = std::make_unique<OffloadBundleEntry>(
-        EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID);
-
-    Entries.push_back(*Entry);
-  }
-
-  return Error::success();
-}
-
-Expected<std::unique_ptr<OffloadBundleFatBin>>
-OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset,
-                            StringRef FileName) {
-  if (Buf.getBufferSize() < 24)
-    return errorCodeToError(object_error::parse_failed);
-
-  // Check for magic bytes.
-  if (identify_magic(Buf.getBuffer()) != file_magic::offload_bundle)
-    return errorCodeToError(object_error::parse_failed);
-
-  OffloadBundleFatBin *TheBundle = new OffloadBundleFatBin(Buf, FileName);
-
-  // Read the Bundle Entries
-  Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset);
-  if (Err)
-    return errorCodeToError(object_error::parse_failed);
-
-  return std::unique_ptr<OffloadBundleFatBin>(TheBundle);
-}
-
-Error OffloadBundleFatBin::extractBundle(const ObjectFile &Source) {
-  // This will extract all entries in the Bundle
-  for (OffloadBundleEntry &Entry : Entries) {
-
-    if (Entry.Size == 0)
-      continue;
-
-    // create output file name. Which should be
-    // <fileName>-offset<Offset>-size<Size>.co"
-    std::string Str = getFileName().str() + "-offset" + itostr(Entry.Offset) +
-                      "-size" + itostr(Entry.Size) + ".co";
-    if (Error Err = object::extractCodeObject(Source, Entry.Offset, Entry.Size,
-                                              StringRef(Str)))
-      return Err;
-  }
-
-  return Error::success();
-}
-
-Error object::extractOffloadBundleFatBinary(
-    const ObjectFile &Obj, SmallVectorImpl<OffloadBundleFatBin> &Bundles) {
-  assert((Obj.isELF() || Obj.isCOFF()) && "Invalid file type");
-
-  // Iterate through Sections until we find an offload_bundle section.
-  for (SectionRef Sec : Obj.sections()) {
-    Expected<StringRef> Buffer = Sec.getContents();
-    if (!Buffer)
-      return Buffer.takeError();
-
-    // If it does not start with the reserved suffix, just skip this section.
-    if ((llvm::identify_magic(*Buffer) == llvm::file_magic::offload_bundle) ||
-        (llvm::identify_magic(*Buffer) ==
-         llvm::file_magic::offload_bundle_compressed)) {
-
-      uint64_t SectionOffset = 0;
-      if (Obj.isELF()) {
-        SectionOffset = ELFSectionRef(Sec).getOffset();
-      } else if (Obj.isCOFF()) {
-        if (const COFFObjectFile *COFFObj = dyn_cast<COFFObjectFile>(&Obj))
-          const coff_section *CoffSection = COFFObj->getCOFFSection(Sec);
-      }
-
-      MemoryBufferRef Contents(*Buffer, Obj.getFileName());
-
-      if (llvm::identify_magic(*Buffer) ==
-          llvm::file_magic::offload_bundle_compressed) {
-        // Decompress the input if necessary.
-        Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
-            CompressedOffloadBundle::decompress(Contents, false);
-
-        if (!DecompressedBufferOrErr)
-          return createStringError(
-              inconvertibleErrorCode(),
-              "Failed to decompress input: " +
-                  llvm::toString(DecompressedBufferOrErr.takeError()));
-
-        MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
-        if (Error Err = extractOffloadBundle(DecompressedInput, SectionOffset,
-                                             Obj.getFileName(), Bundles))
-          return Err;
-      } else {
-        if (Error Err = extractOffloadBundle(Contents, SectionOffset,
-                                             Obj.getFileName(), Bundles))
-          return Err;
-      }
-    }
-  }
-  return Error::success();
-}
-
-Error object::extractCodeObject(const ObjectFile &Source, int64_t Offset,
-                                int64_t Size, StringRef OutputFileName) {
-  Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
-      FileOutputBuffer::create(OutputFileName, Size);
-
-  if (!BufferOrErr)
-    return BufferOrErr.takeError();
-
-  Expected<MemoryBufferRef> InputBuffOrErr = Source.getMemoryBufferRef();
-  if (Error Err = InputBuffOrErr.takeError())
-    return Err;
-
-  std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
-  std::copy(InputBuffOrErr->getBufferStart() + Offset,
-            InputBuffOrErr->getBufferStart() + Offset + Size,
-            Buf->getBufferStart());
-  if (Error E = Buf->commit())
-    return E;
-
-  return Error::success();
-}
-
-// given a file name, offset, and size, extract data into a code object file,
-// into file <SourceFile>-offset<Offset>-size<Size>.co
-Error object::extractOffloadBundleByURI(StringRef URIstr) {
-  // create a URI object
-  Expected<std::unique_ptr<OffloadBundleURI>> UriOrErr(
-      OffloadBundleURI::createOffloadBundleURI(URIstr, FILE_URI));
-  if (!UriOrErr)
-    return UriOrErr.takeError();
-
-  OffloadBundleURI &Uri = **UriOrErr;
-  std::string OutputFile = Uri.FileName.str();
-  OutputFile +=
-      "-offset" + itostr(Uri.Offset) + "-size" + itostr(Uri.Size) + ".co";
-
-  // Create an ObjectFile object from uri.file_uri
-  auto ObjOrErr = ObjectFile::createObjectFile(Uri.FileName);
-  if (!ObjOrErr)
-    return ObjOrErr.takeError();
-
-  auto Obj = ObjOrErr->getBinary();
-  if (Error Err =
-          object::extractCodeObject(*Obj, Uri.Offset, Uri.Size, OutputFile))
-    return Err;
-
-  return Error::success();
-}
-
-// Utility function to format numbers with commas
-static std::string formatWithCommas(unsigned long long Value) {
-  std::string Num = std::to_string(Value);
-  int InsertPosition = Num.length() - 3;
-  while (InsertPosition > 0) {
-    Num.insert(InsertPosition, ",");
-    InsertPosition -= 3;
-  }
-  return Num;
-}
-
-llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-CompressedOffloadBundle::decompress(llvm::MemoryBufferRef &Input,
-                                    bool Verbose) {
-  StringRef Blob = Input.getBuffer();
-
-  if (Blob.size() < V1HeaderSize)
-    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
-
-  if (llvm::identify_magic(Blob) !=
-      llvm::file_magic::offload_bundle_compressed) {
-    if (Verbose)
-      llvm::errs() << "Uncompressed bundle.\n";
-    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
-  }
-
-  size_t CurrentOffset = MagicSize;
-
-  uint16_t ThisVersion;
-  memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += VersionFieldSize;
-
-  uint16_t CompressionMethod;
-  memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += MethodFieldSize;
-
-  uint32_t TotalFileSize;
-  if (ThisVersion >= 2) {
-    if (Blob.size() < V2HeaderSize)
-      return createStringError(inconvertibleErrorCode(),
-                               "Compressed bundle header size too small");
-    memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
-    CurrentOffset += FileSizeFieldSize;
-  }
-
-  uint32_t UncompressedSize;
-  memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
-  CurrentOffset += UncompressedSizeFieldSize;
-
-  uint64_t StoredHash;
-  memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
-  CurrentOffset += HashFieldSize;
-
-  llvm::compression::Format CompressionFormat;
-  if (CompressionMethod ==
-      static_cast<uint16_t>(llvm::compression::Format::Zlib))
-    CompressionFormat = llvm::compression::Format::Zlib;
-  else if (CompressionMethod ==
-           static_cast<uint16_t>(llvm::compression::Format::Zstd))
-    CompressionFormat = llvm::compression::Format::Zstd;
-  else
-    return createStringError(inconvertibleErrorCode(),
-                             "Unknown compressing method");
-
-  llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
-                              OffloadBundlerTimerGroup);
-  if (Verbose)
-    DecompressTimer.startTimer();
-
-  SmallVector<uint8_t, 0> DecompressedData;
-  StringRef CompressedData = Blob.substr(CurrentOffset);
-  if (llvm::Error DecompressionError = llvm::compression::decompress(
-          CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
-          DecompressedData, UncompressedSize))
-    return createStringError(inconvertibleErrorCode(),
-                             "Could not decompress embedded file contents: " +
-                                 llvm::toString(std::move(DecompressionError)));
-
-  if (Verbose) {
-    DecompressTimer.stopTimer();
-
-    double DecompressionTimeSeconds =
-        DecompressTimer.getTotalTime().getWallTime();
-
-    // Recalculate MD5 hash for integrity check.
-    llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
-                                "Hash recalculation time",
-                                OffloadBundlerTimerGroup);
-    HashRecalcTimer.startTimer();
-    llvm::MD5 Hash;
-    llvm::MD5::MD5Result Result;
-    Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
-                                        DecompressedData.size()));
-    Hash.final(Result);
-    uint64_t RecalculatedHash = Result.low();
-    HashRecalcTimer.stopTimer();
-    bool HashMatch = (StoredHash == RecalculatedHash);
-
-    double CompressionRate =
-        static_cast<double>(UncompressedSize) / CompressedData.size();
-    double DecompressionSpeedMBs =
-        (UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
-
-    llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n";
-    if (ThisVersion >= 2)
-      llvm::errs() << "Total file size (from header): "
-                   << formatWithCommas(TotalFileSize) << " bytes\n";
-    llvm::errs() << "Decompression method: "
-                 << (CompressionFormat == llvm::compression::Format::Zlib
-                         ? "zlib"
-                         : "zstd")
-                 << "\n"
-                 << "Size before decompression: "
-                 << formatWithCommas(CompressedData.size()) << " bytes\n"
-                 << "Size after decompression: "
-                 << formatWithCommas(UncompressedSize) << " bytes\n"
-                 << "Compression rate: "
-                 << llvm::format("%.2lf", CompressionRate) << "\n"
-                 << "Compression ratio: "
-                 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
-                 << "Decompression speed: "
-                 << llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
-                 << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
-                 << "Recalculated hash: "
-                 << llvm::format_hex(RecalculatedHash, 16) << "\n"
-                 << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
-  }
-
-  return llvm::MemoryBuffer::getMemBufferCopy(
-      llvm::toStringRef(DecompressedData));
-}
-
-llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-CompressedOffloadBundle::compress(llvm::compression::Params P,
-                                  const llvm::MemoryBuffer &Input,
-                                  bool Verbose) {
-  if (!llvm::compression::zstd::isAvailable() &&
-      !llvm::compression::zlib::isAvailable())
-    return createStringError(llvm::inconvertibleErrorCode(),
-                             "Compression not supported");
-
-  llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
-                        OffloadBundlerTimerGroup);
-  if (Verbose)
-    HashTimer.startTimer();
-  llvm::MD5 Hash;
-  llvm::MD5::MD5Result Result;
-  Hash.update(Input.getBuffer());
-  Hash.final(Result);
-  uint64_t TruncatedHash = Result.low();
-  if (Verbose)
-    HashTimer.stopTimer();
-
-  SmallVector<uint8_t, 0> CompressedBuffer;
-  auto BufferUint8 = llvm::ArrayRef<uint8_t>(
-      reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
-      Input.getBuffer().size());
-
-  llvm::Timer CompressTimer("Compression Timer", "Compression time",
-                            OffloadBundlerTimerGroup);
-  if (Verbose)
-    CompressTimer.startTimer();
-  llvm::compression::compress(P, BufferUint8, CompressedBuffer);
-  if (Verbose)
-    CompressTimer.stopTimer();
-
-  uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
-  uint32_t UncompressedSize = Input.getBuffer().size();
-  uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) +
-                           sizeof(Version) + sizeof(CompressionMethod) +
-                           sizeof(UncompressedSize) + sizeof(TruncatedHash) +
-                           CompressedBuffer.size();
-
-  SmallVector<char, 0> FinalBuffer;
-  llvm::raw_svector_ostream OS(FinalBuffer);
-  OS << MagicNumber;
-  OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
-  OS.write(reinterpret_cast<const char *>(&CompressionMethod),
-           sizeof(CompressionMethod));
-  OS.write(reinterpret_cast<const char *>(&TotalFileSize),
-           sizeof(TotalFileSize));
-  OS.write(reinterpret_cast<const char *>(&UncompressedSize),
-           sizeof(UncompressedSize));
-  OS.write(reinterpret_cast<const char *>(&TruncatedHash),
-           sizeof(TruncatedHash));
-  OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
-           CompressedBuffer.size());
-
-  if (Verbose) {
-    auto MethodUsed =
-        P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
-    double CompressionRate =
-        static_cast<double>(UncompressedSize) / CompressedBuffer.size();
-    double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
-    double CompressionSpeedMBs =
-        (UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
-
-    llvm::errs() << "Compressed bundle format version: " << Version << "\n"
-                 << "Total file size (including headers): "
-                 << formatWithCommas(TotalFileSize) << " bytes\n"
-                 << "Compression method used: " << MethodUsed << "\n"
-                 << "Compression level: " << P.level << "\n"
-                 << "Binary size before compression: "
-                 << formatWithCommas(UncompressedSize) << " bytes\n"
-                 << "Binary size after compression: "
-                 << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
-                 << "Compression rate: "
-                 << llvm::format("%.2lf", CompressionRate) << "\n"
-                 << "Compression ratio: "
-                 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
-                 << "Compression speed: "
-                 << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
-                 << "Truncated MD5 hash: "
-                 << llvm::format_hex(TruncatedHash, 16) << "\n";
-  }
-  return llvm::MemoryBuffer::getMemBufferCopy(
-      llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
-}
diff --git a/llvm/test/tools/llvm-objdump/Offloading/fatbin.test b/llvm/test/tools/llvm-objdump/Offloading/fatbin.test
deleted file mode 100644
index c93e5692f583f..0000000000000
--- a/llvm/test/tools/llvm-objdump/Offloading/fatbin.test
+++ /dev/null
@@ -1,60 +0,0 @@
-## Test that --offloading with a fatbin works correctly
-
-# REQUIRES: target={{.*-linux.*}}
-# RUN: yaml2obj %s -o %t.elf
-# RUN: llvm-objdump --offloading %t.elf 
-# RUN: llvm-objdump -d %t.elf:0.hipv4-amdgcn-amd-amdhsa--gfx908 | FileCheck %s 
-
-
-# CHECK:         s_load_dword s7, s[4:5], 0x24
-# CHECK-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
-# CHECK-NEXT:    v_mov_b32_e32 v1, 0
-# CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-# CHECK-NEXT:    s_and_b32 s4, s7, 0xffff
-# CHECK-NEXT:    s_mul_i32 s6, s6, s4
-# CHECK-NEXT:    v_add_u32_e32 v0, s6, v0
-# CHECK-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
-# CHECK-NEXT:    v_mov_b32_e32 v3, s3
-# CHECK-NEXT:    v_add_co_u32_e32 v2, vcc, s2, v0
-# CHECK-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
-# CHECK-NEXT:    global_load_dword v2, v[2:3], off
-# CHECK-NEXT:    v_mov_b32_e32 v3, s1
-# CHECK-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v0
-# CHECK-NEXT:    v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
-# CHECK-NEXT:    global_load_dword v3, v[0:1], off
-# CHECK-NEXT:    s_waitcnt vmcnt(0)
-# CHECK-NEXT:    v_add_u32_e32 v2, v3, v2
-# CHECK-NEXT:    global_store_dword v[0:1], v2, off
-# CHECK-NEXT:    s_endpgm
-
---- !ELF
-FileHeader:
-  Class:           ELFCLASS64
-  Data:            ELFDATA2LSB
-  Type:            ET_EXEC
-  Machine:         EM_X86_64
-  Entry:           0x2041B0
-ProgramHeaders:
-  - Type:            PT_PHDR
-    Flags:           [ PF_R ]
-    VAddr:           0x200040
-    Align:           0x8
-    Offset:          0x40
-  - Type:            PT_GNU_STACK
-    Flags:           [ PF_W, PF_R ]
-    Align:           0x0
-    Offset:          0x0
-Sections:
-  - Name:            .hip_fatbin
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x201000
-    AddressAlign:    0x1000
-    Content:         5F5F434C414E475F4F46464C4F41445F42554E444C455F5F0200000000000000001000000000000000000000000000001B00000000000000686F73742D7838365F36342D756E6B6E6F776E2D6C696E75782D2D0010000000000000D00F0000000000001F0000000000000068697076342D616D6467636E2D616D642D616D646873612D2D676678393038000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007F454C460201014003000000000000000300E0000100000000000000000000004000000000000000100C0000000000003005000040003800090040000F000D000600000004000000400000000000000040000000000000004000000000000000F801000000000000F80100000000000008000000000000000100000004000000000000000000000000000000000000000000000000000000C008000000000000C008000000000000001000000000000001000000050000000009000000000000001900000000000000190000000000006C000000000000006C00000000000000001000000000000001000000060000007009000000000000702900000000000070290000000000007000000000000000900600000000000000100000000000000100000006000000E009000000000000E039000000000000E039000000000000000000000000000001000000000000000010000000000000020000000600000070090000000000007029000000000000702900000000000070000000000000007000000000000000080000000000000052E574640400000070090000000000007029000000000000702900000000000070000000000000009006000000000000010000000000000051E57464060000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000400000004000000380200000000000038020000000000003802000000000000340500000000000034050000000000000400000000000000070000001D05000020000000414D44475055000083AE616D646873612E6B65726E656C7391DE0012AB2E616770725F636F756E7400A52E61726773DC001085AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA415F642E636F65726365A72E6F666673657400A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657285AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA425F642E636F65726365A72E6F666673657408A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657284A52E6E616D65A14EA72E6F666673657410A52E73697A6508AB2E76616C75655F6B696E64A862795F76616C756583A72E6F666673657418A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7883A72E6F66667365741CA52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7983A72E6F666673657420A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7A83A72E6F666673657424A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7883A72E6F666673657426A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7983A72E6F666673657428A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7A83A72E6F66667365742AA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7883A72E6F66667365742CA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7983A72E6F66667365742EA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7A83A72E6F666673657440A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7883A72E6F666673657448A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7983A72E6F666673657450A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7A83A72E6F666673657458A52E73697A6502AB2E76616C75655F6B696E64B068696464656E5F677269645F64696D73B92E67726F75705F7365676D656E745F66697865645F73697A6500B62E6B65726E6172675F7365676D656E745F616C69676E08B52E6B65726E6172675F7365676D656E745F73697A65CD0118A92E6C616E6775616765A84F70656E434C2043B12E6C616E67756167655F76657273696F6E920200B82E6D61785F666C61745F776F726B67726F75705F73697A65CD0400A52E6E616D65B25F5A3973696D706C65416464506A504B6A6DBB2E707269766174655F7365676D656E745F66697865645F73697A6500AB2E736770725F636F756E740CB12E736770725F7370696C6C5F636F756E7400A72E73796D626F6CB55F5A3973696D706C65416464506A504B6A6D2E6B64B82E756E69666F726D5F776F726B5F67726F75705F73697A6501B32E757365735F64796E616D69635F737461636BC2AB2E766770725F636F756E7404B12E766770725F7370696C6C5F636F756E7400AF2E7761766566726F6E745F73697A6540AD616D646873612E746172676574B9616D6467636E2D616D642D616D646873612D2D676678393038AE616D646873612E76657273696F6E92010200000000000000000000000000000000000000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E03900000000000001000000000000000100000001000000010000001A000000000008400000D20001000000360A4A7A5238A4D3F113F4DD04000000040000000200000001000000000000000300000000000000000000000000000000000000005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F623730363264386333326134613933330000000000000000000000000000000000000000000000000000000000000000000000180100000000000080100000000000000000000000000000000000000000000000000000000000004000AF008C000000090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C20102C02400000002000AC0000000008002027E7FC08CBF07FF0486FFFF0000060406920600006800008FD2820002000302067E0200043203030638008050DC02007F020102067E0000003203030238008050DC00007F03700F8CBF03050468008070DC00027F00000081BF00000000060000000000000070070000000000000B000000000000001800000000000000050000000000000020080000000000000A000000000000004600000000000000F5FEFF6F00000000D0070000000000000400000000000000F807000000000000000000000000000000000000000000004C696E6B65723A20414D44204C4C442031392E302E3000414D4420636C616E672076657273696F6E2031392E302E306769742028202032343231322063393630313665636534313337356462646438663037356266333762643666633333323230376233290000414D4420636C616E672076657273696F6E2031382E302E3067697420287373683A2F2F6765727269746769742F6C696768746E696E672F65632F6C6C766D2D70726F6A65637420616D642D6D61696E6C696E652D6F70656E20323431373620663935303039613166393032313232343865313036333964653837653635636163616338643961372900000000000000000000000000000000000000000000000000460000000002080070290000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E0390000000000000100000000000000002E6E6F7465002E64796E73796D002E676E752E68617368002E68617368002E64796E737472002E726F64617461002E74657874002E64796E616D6963002E72656C726F5F70616464696E67002E627373002E636F6D6D656E74002E73796D746162002E7368737472746162002E73747274616200005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F62373036326438633332613461393333005F44594E414D494300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000070000000200000000000000380200000000000038020000000000003405000000000000000000000000000004000000000000000000000000000000070000000B00000002000000000000007007000000000000700700000000000060000000000000000500000001000000080000000000000018000000000000000F000000F6FFFF6F0200000000000000D007000000000000D007000000000000280000000000000002000000000000000800000000000000000000000000000019000000050000000200000000000000F807000000000000F80700000000000028000000000000000200000000000000040000000000000004000000000000001F000000030000000200000000000000200800000000000020080000000000004600000000000000000000000000000001000000000000000000000000000000270000000100000002000000000000008008000000000000800800000000000040000000000000000000000000000000400000000000000000000000000000002F000000010000000600000000000000001900000000000000090000000000006C00000000000000000000000000000000010000000000000000000000000000350000000600000003000000000000007029000000000000700900000000000070000000000000000500000000000000080000000000000010000000000000003E000000080000000300000000000000E029000000000000E00900000000000020060000000000000000000000000000010000000000000000000000000000004D000000080000000300000000000000E039000000000000E0090000000000000100000000000000000000000000000001000000000000000000000000000000520000000100000030000000000000000000000000000000E009000000000000F0000000000000000000000000000000010000000000000001000000000000005B0000000200000000000000000000000000000000000000D00A00000000000078000000000000000E0000000200000008000000000000001800000000000000630000000300000000000000000000000000000000000000480B00000000000075000000000000000000000000000000010000000000000000000000000000006D0000000300000000000000000000000000000000000000BD0B0000000000004F00000000000000000000000000000001000000000000000000000000000000
-  - Name:            .hipFatBinSegment
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x202FD0
-    AddressAlign:    0x8
-    Content:         '465049480100000000102000000000000000000000000000'
-...
diff --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp
index 654709a4ddff5..4ac6b99e79bbb 100644
--- a/llvm/tools/llvm-objdump/OffloadDump.cpp
+++ b/llvm/tools/llvm-objdump/OffloadDump.cpp
@@ -14,16 +14,12 @@
 #include "OffloadDump.h"
 #include "llvm-objdump.h"
 #include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/OffloadBinary.h"
-#include "llvm/Object/OffloadBundle.h"
 #include "llvm/Support/Alignment.h"
 
 using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::objdump;
 
-void disassembleObject(llvm::object::ObjectFile *, bool InlineRelocs);
-
 /// Get the printable name of the image kind.
 static StringRef getImageName(const OffloadBinary &OB) {
   switch (OB.getImageKind()) {
@@ -52,7 +48,7 @@ static void printBinary(const OffloadBinary &OB, uint64_t Index) {
 }
 
 /// Print the embedded offloading contents of an ObjectFile \p O.
-void llvm::dumpOffloadBinary(const ObjectFile &O, StringRef ArchName) {
+void llvm::dumpOffloadBinary(const ObjectFile &O) {
   if (!O.isELF() && !O.isCOFF()) {
     reportWarning(
         "--offloading is currently only supported for COFF and ELF targets",
@@ -68,46 +64,6 @@ void llvm::dumpOffloadBinary(const ObjectFile &O, StringRef ArchName) {
   // Print out all the binaries that are contained in this buffer.
   for (uint64_t I = 0, E = Binaries.size(); I != E; ++I)
     printBinary(*Binaries[I].getBinary(), I);
-
-  dumpOffloadBundleFatBinary(O, ArchName);
-}
-
-// Given an Object file, collect all Bundles of FatBin Binaries
-// and dump them into Code Object files
-// if -arch=-name is specified, only dump the Entries that match the target arch
-void llvm::dumpOffloadBundleFatBinary(const ObjectFile &O, StringRef ArchName) {
-  if (!O.isELF() && !O.isCOFF()) {
-    reportWarning(
-        "--offloading is currently only supported for COFF and ELF targets",
-        O.getFileName());
-    return;
-  }
-
-  SmallVector<llvm::object::OffloadBundleFatBin> FoundBundles;
-  SmallVector<llvm::object::OffloadBundleEntry> FoundEntries;
-
-  if (Error Err = llvm::object::extractOffloadBundleFatBinary(O, FoundBundles))
-    reportError(O.getFileName(), "while extracting offload FatBin bundles: " +
-                                     toString(std::move(Err)));
-
-  for (const auto &[BundleNum, Bundle] : llvm::enumerate(FoundBundles)) {
-    if (!ArchName.empty())
-      FoundEntries = Bundle.entryIDContains(ArchName);
-    else
-      FoundEntries = Bundle.getEntries();
-
-    for (OffloadBundleEntry &Entry : FoundEntries) {
-      // create file name for this object file:  <source-filename>:<Bundle
-      // Number>.<EntryID>
-      std::string str = Bundle.getFileName().str() + ":" + itostr(BundleNum) +
-                        "." + Entry.ID.str();
-      if (Error Err = object::extractCodeObject(O, Entry.Offset, Entry.Size,
-                                                StringRef(str)))
-        reportError(O.getFileName(),
-                    "while extracting offload Bundle Entries: " +
-                        toString(std::move(Err)));
-    }
-  }
 }
 
 /// Print the contents of an offload binary file \p OB. This may contain
diff --git a/llvm/tools/llvm-objdump/OffloadDump.h b/llvm/tools/llvm-objdump/OffloadDump.h
index 229d479ae357b..75f188e9d5065 100644
--- a/llvm/tools/llvm-objdump/OffloadDump.h
+++ b/llvm/tools/llvm-objdump/OffloadDump.h
@@ -11,16 +11,12 @@
 
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/OffloadBinary.h"
-#include "llvm/Object/OffloadBundle.h"
 
 namespace llvm {
 
 void dumpOffloadSections(const object::OffloadBinary &OB);
-void dumpOffloadBinary(const object::ObjectFile &O, StringRef ArchName);
+void dumpOffloadBinary(const object::ObjectFile &O);
 
-/// Dump fat binary in binary clang-offload-bundler format
-void dumpOffloadBundleFatBinary(const object::ObjectFile &O,
-                                StringRef ArchName);
 } // namespace llvm
 
 #endif
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index dce07935fafd0..5c84fd5380c4d 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -56,7 +56,6 @@
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/OffloadBinary.h"
-#include "llvm/Object/OffloadBundle.h"
 #include "llvm/Object/Wasm.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
@@ -3368,7 +3367,7 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
   if (FaultMapSection)
     printFaultMaps(O);
   if (Offloading)
-    dumpOffloadBinary(*O, StringRef(ArchName));
+    dumpOffloadBinary(*O);
 }
 
 static void dumpObject(const COFFImportFile *I, const Archive *A,
diff --git a/llvm/unittests/Object/CMakeLists.txt b/llvm/unittests/Object/CMakeLists.txt
index 1343352d1dc69..81bc4a5577e68 100644
--- a/llvm/unittests/Object/CMakeLists.txt
+++ b/llvm/unittests/Object/CMakeLists.txt
@@ -16,7 +16,6 @@ add_llvm_unittest(ObjectTests
   MinidumpTest.cpp
   ObjectFileTest.cpp
   OffloadingTest.cpp
-  OffloadingBundleTest.cpp
   SymbolSizeTest.cpp
   SymbolicFileTest.cpp
   XCOFFObjectFileTest.cpp
diff --git a/llvm/unittests/Object/OffloadingBundleTest.cpp b/llvm/unittests/Object/OffloadingBundleTest.cpp
deleted file mode 100644
index 923e7fbf48da4..0000000000000
--- a/llvm/unittests/Object/OffloadingBundleTest.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-// Skip running on Windows.
-#if !defined(_WIN32)
-
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/OffloadBinary.h"
-#include "llvm/Object/OffloadBundle.h"
-#include "llvm/ObjectYAML/yaml2obj.h"
-#include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/YAMLTraits.h"
-#include "llvm/Testing/Support/Error.h"
-#include "gtest/gtest.h"
-#include <random>
-
-using namespace llvm;
-using namespace llvm::object;
-
-StringRef simpleAdd = R"(
---- !ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data:  ELFDATA2LSB
-  Type:            ET_EXEC
-  Machine:         EM_X86_64
-  Entry:           0x2041B0
-Sections:
-  - Name:            .hip_fatbin
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x201000
-    AddressAlign:    0x1000
-    Content:         5F5F434C414E475F4F46464C4F41445F42554E444C455F5F0200000000000000001000000000000000000000000000001B00000000000000686F73742D7838365F36342D756E6B6E6F776E2D6C696E75782D2D0010000000000000D00F0000000000001F0000000000000068697076342D616D6467636E2D616D642D616D646873612D2D676678393038000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007F454C460201014003000000000000000300E0000100000000000000000000004000000000000000100C0000000000003005000040003800090040000F000D000600000004000000400000000000000040000000000000004000000000000000F801000000000000F80100000000000008000000000000000100000004000000000000000000000000000000000000000000000000000000C008000000000000C008000000000000001000000000000001000000050000000009000000000000001900000000000000190000000000006C000000000000006C00000000000000001000000000000001000000060000007009000000000000702900000000000070290000000000007000000000000000900600000000000000100000000000000100000006000000E009000000000000E039000000000000E039000000000000000000000000000001000000000000000010000000000000020000000600000070090000000000007029000000000000702900000000000070000000000000007000000000000000080000000000000052E574640400000070090000000000007029000000000000702900000000000070000000000000009006000000000000010000000000000051E57464060000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000400000004000000380200000000000038020000000000003802000000000000340500000000000034050000000000000400000000000000070000001D05000020000000414D44475055000083AE616D646873612E6B65726E656C7391DE0012AB2E616770725F636F756E7400A52E61726773DC001085AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA415F642E636F65726365A72E6F666673657400A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657285AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA425F642E636F65726365A72E6F666673657408A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657284A52E6E616D65A14EA72E6F666673657410A52E73697A6508AB2E76616C75655F6B696E64A862795F76616C756583A72E6F666673657418A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7883A72E6F66667365741CA52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7983A72E6F666673657420A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7A83A72E6F666673657424A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7883A72E6F666673657426A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7983A72E6F666673657428A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7A83A72E6F66667365742AA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7883A72E6F66667365742CA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7983A72E6F66667365742EA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7A83A72E6F666673657440A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7883A72E6F666673657448A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7983A72E6F666673657450A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7A83A72E6F666673657458A52E73697A6502AB2E76616C75655F6B696E64B068696464656E5F677269645F64696D73B92E67726F75705F7365676D656E745F66697865645F73697A6500B62E6B65726E6172675F7365676D656E745F616C69676E08B52E6B65726E6172675F7365676D656E745F73697A65CD0118A92E6C616E6775616765A84F70656E434C2043B12E6C616E67756167655F76657273696F6E920200B82E6D61785F666C61745F776F726B67726F75705F73697A65CD0400A52E6E616D65B25F5A3973696D706C65416464506A504B6A6DBB2E707269766174655F7365676D656E745F66697865645F73697A6500AB2E736770725F636F756E740CB12E736770725F7370696C6C5F636F756E7400A72E73796D626F6CB55F5A3973696D706C65416464506A504B6A6D2E6B64B82E756E69666F726D5F776F726B5F67726F75705F73697A6501B32E757365735F64796E616D69635F737461636BC2AB2E766770725F636F756E7404B12E766770725F7370696C6C5F636F756E7400AF2E7761766566726F6E745F73697A6540AD616D646873612E746172676574B9616D6467636E2D616D642D616D646873612D2D676678393038AE616D646873612E76657273696F6E92010200000000000000000000000000000000000000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E03900000000000001000000000000000100000001000000010000001A000000000008400000D20001000000360A4A7A5238A4D3F113F4DD04000000040000000200000001000000000000000300000000000000000000000000000000000000005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F623730363264386333326134613933330000000000000000000000000000000000000000000000000000000000000000000000180100000000000080100000000000000000000000000000000000000000000000000000000000004000AF008C000000090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C20102C02400000002000AC0000000008002027E7FC08CBF07FF0486FFFF0000060406920600006800008FD2820002000302067E0200043203030638008050DC02007F020102067E0000003203030238008050DC00007F03700F8CBF03050468008070DC00027F00000081BF00000000060000000000000070070000000000000B000000000000001800000000000000050000000000000020080000000000000A000000000000004600000000000000F5FEFF6F00000000D0070000000000000400000000000000F807000000000000000000000000000000000000000000004C696E6B65723A20414D44204C4C442031392E302E3000414D4420636C616E672076657273696F6E2031392E302E306769742028202032343231322063393630313665636534313337356462646438663037356266333762643666633333323230376233290000414D4420636C616E672076657273696F6E2031382E302E3067697420287373683A2F2F6765727269746769742F6C696768746E696E672F65632F6C6C766D2D70726F6A65637420616D642D6D61696E6C696E652D6F70656E20323431373620663935303039613166393032313232343865313036333964653837653635636163616338643961372900000000000000000000000000000000000000000000000000460000000002080070290000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E0390000000000000100000000000000002E6E6F7465002E64796E73796D002E676E752E68617368002E68617368002E64796E737472002E726F64617461002E74657874002E64796E616D6963002E72656C726F5F70616464696E67002E627373002E636F6D6D656E74002E73796D746162002E7368737472746162002E73747274616200005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F62373036326438633332613461393333005F44594E414D494300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000070000000200000000000000380200000000000038020000000000003405000000000000000000000000000004000000000000000000000000000000070000000B00000002000000000000007007000000000000700700000000000060000000000000000500000001000000080000000000000018000000000000000F000000F6FFFF6F0200000000000000D007000000000000D007000000000000280000000000000002000000000000000800000000000000000000000000000019000000050000000200000000000000F807000000000000F80700000000000028000000000000000200000000000000040000000000000004000000000000001F000000030000000200000000000000200800000000000020080000000000004600000000000000000000000000000001000000000000000000000000000000270000000100000002000000000000008008000000000000800800000000000040000000000000000000000000000000400000000000000000000000000000002F000000010000000600000000000000001900000000000000090000000000006C00000000000000000000000000000000010000000000000000000000000000350000000600000003000000000000007029000000000000700900000000000070000000000000000500000000000000080000000000000010000000000000003E000000080000000300000000000000E029000000000000E00900000000000020060000000000000000000000000000010000000000000000000000000000004D000000080000000300000000000000E039000000000000E0090000000000000100000000000000000000000000000001000000000000000000000000000000520000000100000030000000000000000000000000000000E009000000000000F0000000000000000000000000000000010000000000000001000000000000005B0000000200000000000000000000000000000000000000D00A00000000000078000000000000000E0000000200000008000000000000001800000000000000630000000300000000000000000000000000000000000000480B00000000000075000000000000000000000000000000010000000000000000000000000000006D0000000300000000000000000000000000000000000000BD0B0000000000004F00000000000000000000000000000001000000000000000000000000000000
-  - Name:            .hipFatBinSegment
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_ALLOC ]
-    Address:         0x202FD0
-    AddressAlign:    0x8
-    Content:         '465049480100000000102000000000000000000000000000'
-)";
-
-// ELF Object creation
-static Expected<std::unique_ptr<ObjectFile>>
-toBinary(SmallVectorImpl<char> &Storage, StringRef Yaml) {
-  raw_svector_ostream OS(Storage);
-  yaml::Input YIn(Yaml);
-  if (!yaml::convertYAML(YIn, OS, [](const Twine &Msg) {}))
-    return createStringError(std::errc::invalid_argument,
-                             "unable to convert YAML");
-  return object::ObjectFile::createELFObjectFile(
-      MemoryBufferRef(OS.str(), "dummyELF"));
-}
-
-TEST(OffloadingBundleTest, checkExtractOffloadBundleFatBinary) {
-
-  // create a Memory Buffer with a fatbin offloading section
-  MemoryBufferRef mbuf;
-  StringRef FileName;
-  SmallVector<OffloadBundleEntry>();
-  SmallString<0> Storage;
-  // Expected<ELFObjectFile<ELF64LE>> ObjOrErr = toBinary<ELF64LE>(Storage, R"(
-  Expected<std::unique_ptr<ObjectFile>> ObjOrErr = toBinary(Storage, simpleAdd);
-
-  ASSERT_THAT_EXPECTED(ObjOrErr, Succeeded());
-
-  SmallVector<llvm::object::OffloadBundleFatBin> Bundles;
-  Error Err = extractOffloadBundleFatBinary(**ObjOrErr, Bundles);
-  EXPECT_FALSE(errorToBool(std::move(Err)));
-}
-
-TEST(OffloadingBundleTest, checkExtractCodeObject) {
-  // create a Memory Buffer with a fatbin offloading section
-  MemoryBufferRef mbuf;
-  StringRef FileName;
-  SmallVector<OffloadBundleEntry>();
-  SmallString<0> Storage;
-  // Expected<ELFObjectFile<ELF64LE>> ObjOrErr = toBinary<ELF64LE>(Storage, R"(
-  Expected<std::unique_ptr<ObjectFile>> ObjOrErr = toBinary(Storage, simpleAdd);
-
-  ASSERT_THAT_EXPECTED(ObjOrErr, Succeeded());
-
-  int64_t Offset = 8192;
-  int64_t Size = 4048;
-
-  Error Err = extractCodeObject(**ObjOrErr, Offset, Size,
-                                StringRef("checkExtractCodeObject.co"));
-  EXPECT_FALSE(errorToBool(std::move(Err)));
-}
-
-#endif

>From c81341f7fd9a59bf6f960f9704dc7dc45c711a91 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot at gmail.com>
Date: Fri, 9 May 2025 00:20:53 +0000
Subject: [PATCH 09/25] [gn build] Port 2299bada9f16

---
 llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn       | 1 -
 llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn | 1 -
 2 files changed, 2 deletions(-)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn
index 883c648d83daa..3e381d975c1f0 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Object/BUILD.gn
@@ -39,7 +39,6 @@ static_library("Object") {
     "Object.cpp",
     "ObjectFile.cpp",
     "OffloadBinary.cpp",
-    "OffloadBundle.cpp",
     "RecordStreamer.cpp",
     "RelocationResolver.cpp",
     "SymbolSize.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn
index 9fcb05c1a34dc..3a58a78db5169 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Object/BUILD.gn
@@ -18,7 +18,6 @@ unittest("ObjectTests") {
     "GOFFObjectFileTest.cpp",
     "MinidumpTest.cpp",
     "ObjectFileTest.cpp",
-    "OffloadingBundleTest.cpp",
     "OffloadingTest.cpp",
     "SymbolSizeTest.cpp",
     "SymbolicFileTest.cpp",

>From b80c3c576f169326f55956985706816cf7b170eb Mon Sep 17 00:00:00 2001
From: jimingham <jingham at apple.com>
Date: Thu, 8 May 2025 18:00:52 -0700
Subject: [PATCH 10/25] Branch no lld (#139187)

I suspect the test may be failing because lld doesn't behave the same
way the native Darwin linker does. Trying that theory here...
---
 lldb/test/API/macosx/branch-islands/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/API/macosx/branch-islands/Makefile b/lldb/test/API/macosx/branch-islands/Makefile
index 062e947f6d6ee..ff341522e15de 100644
--- a/lldb/test/API/macosx/branch-islands/Makefile
+++ b/lldb/test/API/macosx/branch-islands/Makefile
@@ -4,7 +4,7 @@ CFLAGS_EXTRAS := -std=c99
 include Makefile.rules
 
 a.out: main.o padding1.o padding2.o padding3.o padding4.o foo.o
-	${CC} ${LDFLAGS} foo.o padding1.o padding2.o padding3.o padding4.o main.o -o a.out
+	${CC} ${LDFLAGS} -fuse-ld=/usr/bin/ld foo.o padding1.o padding2.o padding3.o padding4.o main.o -o a.out
 
 %.o: $(SRCDIR)/%.s
 	${CC} -c $<

>From 803fd732ae634b49c308e88e9b508fdbff664034 Mon Sep 17 00:00:00 2001
From: jimingham <jingham at apple.com>
Date: Thu, 8 May 2025 18:22:10 -0700
Subject: [PATCH 11/25] Branch island no dsym (#139191)

When we get to the branch island, we don't see the symbol for
    it.

    The only other thing I can think of that would be a dsymutil bug?
Let's try this just with dwarf, and then I'll have to revert all this
    and see if I can reproduce this locally somehow.
---
 lldb/test/API/macosx/branch-islands/TestBranchIslands.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/API/macosx/branch-islands/TestBranchIslands.py b/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
index a8dd1886d5568..2d768d35aad03 100644
--- a/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
+++ b/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
@@ -15,7 +15,7 @@ class TestBranchIslandStepping(TestBase):
     @skipUnlessAppleSilicon
     def test_step_in_branch_island(self):
         """Make sure we can step in across a branch island"""
-        self.build()
+        self.build(debug_info="dwarf")
         self.main_source_file = lldb.SBFileSpec("main.c")
         self.do_test()
 

>From 74120d0a389584bd8d74073fb0c0b80af29f0a4c Mon Sep 17 00:00:00 2001
From: jimingham <jingham at apple.com>
Date: Thu, 8 May 2025 18:37:43 -0700
Subject: [PATCH 12/25] Revert branch island experiments (#139192)

This test is failing because when we step to what is the branch island
address and ask for its symbol, we can't resolve the symbol, and just
call it the last padding symbol plus a bajillion.

That has nothing to do with the changes in this patch, but I'll revert
this and keep trying to figure out why symbol reading on this bot is
wrong.
---
 .../MacOSX-DYLD/DynamicLoaderDarwin.cpp       | 41 ++---------
 lldb/test/API/macosx/branch-islands/Makefile  | 16 -----
 .../branch-islands/TestBranchIslands.py       | 72 -------------------
 lldb/test/API/macosx/branch-islands/foo.c     |  6 --
 lldb/test/API/macosx/branch-islands/main.c    |  6 --
 .../test/API/macosx/branch-islands/padding1.s |  3 -
 .../test/API/macosx/branch-islands/padding2.s |  3 -
 .../test/API/macosx/branch-islands/padding3.s |  3 -
 .../test/API/macosx/branch-islands/padding4.s |  3 -
 9 files changed, 7 insertions(+), 146 deletions(-)
 delete mode 100644 lldb/test/API/macosx/branch-islands/Makefile
 delete mode 100644 lldb/test/API/macosx/branch-islands/TestBranchIslands.py
 delete mode 100644 lldb/test/API/macosx/branch-islands/foo.c
 delete mode 100644 lldb/test/API/macosx/branch-islands/main.c
 delete mode 100644 lldb/test/API/macosx/branch-islands/padding1.s
 delete mode 100644 lldb/test/API/macosx/branch-islands/padding2.s
 delete mode 100644 lldb/test/API/macosx/branch-islands/padding3.s
 delete mode 100644 lldb/test/API/macosx/branch-islands/padding4.s

diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
index 6c3040ef1a1da..e25c4ff55e408 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
@@ -26,7 +26,6 @@
 #include "lldb/Target/Thread.h"
 #include "lldb/Target/ThreadPlanCallFunction.h"
 #include "lldb/Target/ThreadPlanRunToAddress.h"
-#include "lldb/Target/ThreadPlanStepInstruction.h"
 #include "lldb/Utility/DataBuffer.h"
 #include "lldb/Utility/DataBufferHeap.h"
 #include "lldb/Utility/LLDBLog.h"
@@ -924,15 +923,15 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
   if (current_symbol != nullptr) {
     std::vector<Address> addresses;
 
-    ConstString current_name =
-        current_symbol->GetMangled().GetName(Mangled::ePreferMangled);
     if (current_symbol->IsTrampoline()) {
+      ConstString trampoline_name =
+          current_symbol->GetMangled().GetName(Mangled::ePreferMangled);
 
-      if (current_name) {
+      if (trampoline_name) {
         const ModuleList &images = target_sp->GetImages();
 
         SymbolContextList code_symbols;
-        images.FindSymbolsWithNameAndType(current_name, eSymbolTypeCode,
+        images.FindSymbolsWithNameAndType(trampoline_name, eSymbolTypeCode,
                                           code_symbols);
         for (const SymbolContext &context : code_symbols) {
           Address addr = context.GetFunctionOrSymbolAddress();
@@ -946,8 +945,8 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
         }
 
         SymbolContextList reexported_symbols;
-        images.FindSymbolsWithNameAndType(current_name, eSymbolTypeReExported,
-                                          reexported_symbols);
+        images.FindSymbolsWithNameAndType(
+            trampoline_name, eSymbolTypeReExported, reexported_symbols);
         for (const SymbolContext &context : reexported_symbols) {
           if (context.symbol) {
             Symbol *actual_symbol =
@@ -969,7 +968,7 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
         }
 
         SymbolContextList indirect_symbols;
-        images.FindSymbolsWithNameAndType(current_name, eSymbolTypeResolver,
+        images.FindSymbolsWithNameAndType(trampoline_name, eSymbolTypeResolver,
                                           indirect_symbols);
 
         for (const SymbolContext &context : indirect_symbols) {
@@ -1029,32 +1028,6 @@ DynamicLoaderDarwin::GetStepThroughTrampolinePlan(Thread &thread,
       thread_plan_sp = std::make_shared<ThreadPlanRunToAddress>(
           thread, load_addrs, stop_others);
     }
-    // One more case we have to consider is "branch islands".  These are regular
-    // TEXT symbols but their names end in .island plus maybe a .digit suffix.
-    // They are to allow arm64 code to branch further than the size of the
-    // address slot allows.  We just need to single-instruction step in that
-    // case.
-    static const char *g_branch_island_pattern = "\\.island\\.?[0-9]*$";
-    static RegularExpression g_branch_island_regex(g_branch_island_pattern);
-
-    bool is_branch_island = g_branch_island_regex.Execute(current_name);
-    // FIXME: this is extra logging so I can figure out why this test is failing
-    // on the bot but not locally with all the same tools, etc...
-    if (thread_plan_sp && is_branch_island) {
-      if (log) {
-        StreamString s;
-        thread_plan_sp->GetDescription(&s, eDescriptionLevelVerbose);
-        LLDB_LOGF(log, "Am at a branch island, but already had plan: \n\t%s", s.GetData());
-      }
-    }
-    if (!thread_plan_sp && is_branch_island) {
-      thread_plan_sp = std::make_shared<ThreadPlanStepInstruction>(
-          thread,
-          /* step_over= */ false, /* stop_others */ false, eVoteNoOpinion,
-          eVoteNoOpinion);
-      LLDB_LOG(log, "Stepping one instruction over branch island: '{0}'.",
-               current_name);
-    }
   } else {
     LLDB_LOGF(log, "Could not find symbol for step through.");
   }
diff --git a/lldb/test/API/macosx/branch-islands/Makefile b/lldb/test/API/macosx/branch-islands/Makefile
deleted file mode 100644
index ff341522e15de..0000000000000
--- a/lldb/test/API/macosx/branch-islands/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-C_SOURCES := main.c foo.c
-CFLAGS_EXTRAS := -std=c99
-
-include Makefile.rules
-
-a.out: main.o padding1.o padding2.o padding3.o padding4.o foo.o
-	${CC} ${LDFLAGS} -fuse-ld=/usr/bin/ld foo.o padding1.o padding2.o padding3.o padding4.o main.o -o a.out
-
-%.o: $(SRCDIR)/%.s
-	${CC} -c $<
-
-#padding1.o: padding1.s
-#	${CC} -c $(SRCDIR)/padding1.s
-
-#padding2.o: padding2.s
-#	${CC} -c $(SRCDIR)/padding2.s
diff --git a/lldb/test/API/macosx/branch-islands/TestBranchIslands.py b/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
deleted file mode 100644
index 2d768d35aad03..0000000000000
--- a/lldb/test/API/macosx/branch-islands/TestBranchIslands.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""
-Make sure that we can step in across an arm64 branch island
-"""
-
-import os
-import lldb
-import lldbsuite.test.lldbutil as lldbutil
-from lldbsuite.test.lldbtest import *
-from lldbsuite.test.decorators import *
-
-
-class TestBranchIslandStepping(TestBase):
-    NO_DEBUG_INFO_TESTCASE = True
-
-    @skipUnlessAppleSilicon
-    def test_step_in_branch_island(self):
-        """Make sure we can step in across a branch island"""
-        self.build(debug_info="dwarf")
-        self.main_source_file = lldb.SBFileSpec("main.c")
-        self.do_test()
-
-    def do_test(self):
-        (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
-            self, "Set a breakpoint here", self.main_source_file
-        )
-
-        # Make sure that we did manage to generate a branch island for foo:
-        syms = target.FindSymbols("foo.island", lldb.eSymbolTypeCode)
-        self.assertEqual(len(syms), 1, "We did generate an island for foo")
-
-        # Gathering some info to dump in case of failure:
-        trace_before = lldbutil.print_stacktrace(thread, True)
-        func_before = thread.frames[0].function
-
-        log_file_path = os.path.join(self.getBuildDir(), "step-log.txt")
-        self.runCmd(f"log enable -f {log_file_path} lldb step")
-        
-        thread.StepInto()
-        stop_frame = thread.frames[0]
-        # This is failing on the bot, but I can't reproduce the failure
-        # locally.  Let's see if we can dump some more info here to help
-        # figure out what went wrong...
-        if stop_frame.name.find("foo") == -1:
-            stream = lldb.SBStream()
-            print("Branch island symbols: ")
-            syms[0].GetDescription(stream)
-            for i in range(0, 6):
-                for sep in ["", "."]:
-                    syms = target.FindSymbols(
-                        f"foo.island{sep}{i}", lldb.eSymbolTypeCode
-                    )
-                    if len(syms) > 0:
-                        stream.Print("\n")
-                        syms[0].GetDescription(stream)
-
-            print(stream.GetData())
-            print(f"Start backtrace:")
-            print(trace_before)
-            print(f"\n'main' disassembly:\n{lldbutil.disassemble(target, func_before)}")
-            print("\nEnd backtrace:\n")
-            lldbutil.print_stacktrace(thread)
-            print(
-                f"\nStop disassembly:\n {lldbutil.disassemble(target, stop_frame.function)}"
-            )
-            with open(log_file_path, "r") as f:
-                data = f.read()
-                print("Step Log:")
-                print(data)
-
-        self.assertIn("foo", stop_frame.name, "Stepped into foo")
-        var = stop_frame.FindVariable("a_variable_in_foo")
-        self.assertTrue(var.IsValid(), "Found the variable in foo")
diff --git a/lldb/test/API/macosx/branch-islands/foo.c b/lldb/test/API/macosx/branch-islands/foo.c
deleted file mode 100644
index a5dd2e59e1d82..0000000000000
--- a/lldb/test/API/macosx/branch-islands/foo.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <stdio.h>
-
-void foo() {
-  int a_variable_in_foo = 10;
-  printf("I am foo: %d.\n", a_variable_in_foo);
-}
diff --git a/lldb/test/API/macosx/branch-islands/main.c b/lldb/test/API/macosx/branch-islands/main.c
deleted file mode 100644
index b5578bdd715df..0000000000000
--- a/lldb/test/API/macosx/branch-islands/main.c
+++ /dev/null
@@ -1,6 +0,0 @@
-extern void foo();
-
-int main() {
-  foo(); // Set a breakpoint here
-  return 0;
-}
diff --git a/lldb/test/API/macosx/branch-islands/padding1.s b/lldb/test/API/macosx/branch-islands/padding1.s
deleted file mode 100644
index 4911e53b0240d..0000000000000
--- a/lldb/test/API/macosx/branch-islands/padding1.s
+++ /dev/null
@@ -1,3 +0,0 @@
-.text
-_padding1:
-.space 120*1024*1024
diff --git a/lldb/test/API/macosx/branch-islands/padding2.s b/lldb/test/API/macosx/branch-islands/padding2.s
deleted file mode 100644
index 5ad1bad11263b..0000000000000
--- a/lldb/test/API/macosx/branch-islands/padding2.s
+++ /dev/null
@@ -1,3 +0,0 @@
-.text
-_padding2:
-.space 120*1024*1024
diff --git a/lldb/test/API/macosx/branch-islands/padding3.s b/lldb/test/API/macosx/branch-islands/padding3.s
deleted file mode 100644
index 9f614eecf56d9..0000000000000
--- a/lldb/test/API/macosx/branch-islands/padding3.s
+++ /dev/null
@@ -1,3 +0,0 @@
-.text
-_padding3:
-.space 120*1024*1024
diff --git a/lldb/test/API/macosx/branch-islands/padding4.s b/lldb/test/API/macosx/branch-islands/padding4.s
deleted file mode 100644
index 12896cf5e5b8e..0000000000000
--- a/lldb/test/API/macosx/branch-islands/padding4.s
+++ /dev/null
@@ -1,3 +0,0 @@
-.text
-_padding4:
-.space 120*1024*1024

>From 035dcf6a2a706288c8abe1017c965a1033fca800 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Thu, 8 May 2025 22:08:31 -0400
Subject: [PATCH 13/25] [HIP] fix bundle ID for amdgcnspirv (#139112)

Currently ROCm 6.4.0 only recognize spirv64-amd-amdhsa- in bundle ID.
spirv64-amd-amdhsa-unknown causes all HIP apps compiled for amdgcnspirv
to fail.

Previously we fixed a similar issue for
amdgcn-amd-amdhsa-unknown. This patch extends that to
spirv64-amd-amdhsa-unknown.
---
 clang/lib/Driver/OffloadBundler.cpp                   | 10 +++++-----
 clang/test/Driver/clang-offload-bundler-standardize.c |  7 ++++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index 859e44fb9bdb2..e7a737796925e 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -145,11 +145,11 @@ bool OffloadTargetInfo::operator==(const OffloadTargetInfo &Target) const {
 
 std::string OffloadTargetInfo::str() const {
   std::string NormalizedTriple;
-  // Unfortunately we need some special sauce for AMDGPU because all the runtime
-  // assumes the triple to be "amdgcn-amd-amdhsa-" (empty environment) instead
-  // of "amdgcn-amd-amdhsa-unknown". It's gonna be very tricky to patch
-  // different layers of runtime.
-  if (Triple.isAMDGPU()) {
+  // Unfortunately we need some special sauce for AMDHSA because all the runtime
+  // assumes the triple to be "amdgcn/spirv64-amd-amdhsa-" (empty environment)
+  // instead of "amdgcn/spirv64-amd-amdhsa-unknown". It's gonna be very tricky
+  // to patch different layers of runtime.
+  if (Triple.getOS() == Triple::OSType::AMDHSA) {
     NormalizedTriple = Triple.normalize(Triple::CanonicalForm::THREE_IDENT);
     NormalizedTriple.push_back('-');
   } else {
diff --git a/clang/test/Driver/clang-offload-bundler-standardize.c b/clang/test/Driver/clang-offload-bundler-standardize.c
index fd87fca4ff59d..5b831eec794d2 100644
--- a/clang/test/Driver/clang-offload-bundler-standardize.c
+++ b/clang/test/Driver/clang-offload-bundler-standardize.c
@@ -11,16 +11,17 @@
 //
 // RUN: echo 'Content of device file 1' > %t.tgt1
 // RUN: echo 'Content of device file 2' > %t.tgt2
-
+// RUN: echo 'Content of device file 3' > %t.tgt3
 //
 // Check code object compatibility for archive unbundling
 //
 // Create an object bundle
-// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx908 -input=%t.o -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle
+// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx908,hip-spirv64-amd-amdhsa--amdgcnspirv -input=%t.o -input=%t.tgt1 -input=%t.tgt2 -input=%t.tgt3 -output=%t.bundle
 
-// RUN: clang-offload-bundler -unbundle -type=o -targets=hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx908 -input=%t.bundle -output=%t-hip-amdgcn-amd-amdhsa--gfx906.bc -output=%t-hip-amdgcn-amd-amdhsa--gfx908.bc -debug-only=CodeObjectCompatibility 2>&1 | FileCheck %s -check-prefix=BUNDLE
+// RUN: clang-offload-bundler -unbundle -type=o -targets=hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx908,hip-spirv64-amd-amdhsa--amdgcnspirv -input=%t.bundle -output=%t-hip-amdgcn-amd-amdhsa--gfx906.bc -output=%t-hip-amdgcn-amd-amdhsa--gfx908.bc -output=%t-hip-spirv64-amd-amdhsa--amdgcnspirv.bc -debug-only=CodeObjectCompatibility 2>&1 | FileCheck %s -check-prefix=BUNDLE
 // BUNDLE: Compatible: Exact match: [CodeObject: hip-amdgcn-amd-amdhsa--gfx906] : [Target: hip-amdgcn-amd-amdhsa--gfx906]
 // BUNDLE: Compatible: Exact match: [CodeObject: hip-amdgcn-amd-amdhsa--gfx908] : [Target: hip-amdgcn-amd-amdhsa--gfx908]
+// BUNDLE: Compatible: Exact match: [CodeObject: hip-spirv64-amd-amdhsa--amdgcnspirv] : [Target: hip-spirv64-amd-amdhsa--amdgcnspirv]
 
 // Some code so that we can create a binary out of this file.
 int A = 0;

>From 8e895b50b9bc71bbdae92b1dea7189216b7764ab Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Thu, 8 May 2025 19:30:31 -0700
Subject: [PATCH 14/25] [MLGO][Docs] Add initial skeleton of MLGO docs

This adds an initial skeleton of the MLGO docs. This is intended to be
fleshed out over the next couple days as we work on filling out the
relevant sections on the tooling/features that are available in upstream
LLVM.

Reviewers: efriedma-quic, mtrofin

Reviewed By: mtrofin

Pull Request: https://github.com/llvm/llvm-project/pull/139177
---
 llvm/docs/MLGO.rst      | 28 ++++++++++++++++++++++++++++
 llvm/docs/Reference.rst |  1 +
 2 files changed, 29 insertions(+)
 create mode 100644 llvm/docs/MLGO.rst

diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst
new file mode 100644
index 0000000000000..28518b83d8c3e
--- /dev/null
+++ b/llvm/docs/MLGO.rst
@@ -0,0 +1,28 @@
+====
+MLGO
+====
+
+Introduction
+============
+
+MLGO is a framework for integrating ML techniques systematically in LLVM. It is
+designed primarily to replace heuristics within LLVM with machine learned
+models. Currently there is upstream infrastructure for the following
+heuristics:
+
+* Inlining for size
+* Register allocation (LLVM greedy eviction heuristic) for performance
+
+This document is an outline of the tooling that composes MLGO.
+
+Corpus Tooling
+==============
+
+..
+    TODO(boomanaiden154): Write this section.
+
+Model Runner Interfaces
+=======================
+
+..
+    TODO(mtrofin): Write this section.
diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst
index e1f46b00f2b30..565d5c6876d66 100644
--- a/llvm/docs/Reference.rst
+++ b/llvm/docs/Reference.rst
@@ -41,6 +41,7 @@ LLVM and API reference documentation.
    PDB/index
    PointerAuth
    ScudoHardenedAllocator
+   MLGO
    MemoryModelRelaxationAnnotations
    MemTagSanitizer
    Security

>From 7c7ea1e0eb9fcaf39aefa05cb88f85b771afae25 Mon Sep 17 00:00:00 2001
From: no92 <no92 at users.noreply.github.com>
Date: Fri, 9 May 2025 04:37:17 +0200
Subject: [PATCH 15/25] [llvm] Add managarm support (#138854)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR is part of a series to upstream managarm support, as laid out in
the
[RFC](https://discourse.llvm.org/t/rfc-new-proposed-managarm-support-for-llvm-and-clang-87845/85884/1).
This PR is a follow-up to #87845.

The intention here is to add the managarm target to the LLVM support
lib, in preparation for a follow-up that would add the missing clang
driver bits.
---
 llvm/include/llvm/ADT/bit.h    |  2 +-
 llvm/lib/Support/Unix/Path.inc | 20 ++++++++++++--------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h
index 915a7de50aaff..8544efb5c45d4 100644
--- a/llvm/include/llvm/ADT/bit.h
+++ b/llvm/include/llvm/ADT/bit.h
@@ -30,7 +30,7 @@
 
 #if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) ||            \
     defined(__Fuchsia__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) ||  \
-    defined(__OpenBSD__) || defined(__DragonFly__)
+    defined(__OpenBSD__) || defined(__DragonFly__) || defined(__managarm__)
 #include <endian.h>
 #elif defined(_AIX)
 #include <sys/machine.h>
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index 6004e007c0c7a..fae12f7efffe8 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -69,7 +69,8 @@ extern char **environ;
 
 #include <sys/types.h>
 #if !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__FreeBSD__) &&   \
-    !defined(__linux__) && !defined(__FreeBSD_kernel__) && !defined(_AIX)
+    !defined(__linux__) && !defined(__FreeBSD_kernel__) && !defined(_AIX) &&   \
+    !defined(__managarm__)
 #include <sys/statvfs.h>
 #define STATVFS statvfs
 #define FSTATVFS fstatvfs
@@ -78,7 +79,7 @@ extern char **environ;
 #if defined(__OpenBSD__) || defined(__FreeBSD__)
 #include <sys/mount.h>
 #include <sys/param.h>
-#elif defined(__linux__)
+#elif defined(__linux__) || defined(__managarm__)
 #if defined(HAVE_LINUX_MAGIC_H)
 #include <linux/magic.h>
 #else
@@ -121,10 +122,12 @@ namespace fs {
 
 const file_t kInvalidFile = -1;
 
-#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) ||      \
-    defined(__FreeBSD_kernel__) || defined(__linux__) || defined(__CYGWIN__) || \
-    defined(__DragonFly__) || defined(_AIX) || defined(__GNU__) ||              \
-    (defined(__sun__) && defined(__svr4__) || defined(__HAIKU__))
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) ||     \
+    defined(__FreeBSD_kernel__) || defined(__linux__) ||                       \
+    defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) ||          \
+    defined(__GNU__) ||                                                        \
+    (defined(__sun__) && defined(__svr4__) || defined(__HAIKU__)) ||           \
+    defined(__managarm__)
 static int test_dir(char ret[PATH_MAX], const char *dir, const char *bin) {
   struct stat sb;
   char fullpath[PATH_MAX];
@@ -245,7 +248,8 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
   // If we don't have procfs mounted, fall back to argv[0]
   if (getprogpath(exe_path, argv0) != NULL)
     return exe_path;
-#elif defined(__linux__) || defined(__CYGWIN__) || defined(__gnu_hurd__)
+#elif defined(__linux__) || defined(__CYGWIN__) || defined(__gnu_hurd__) ||    \
+    defined(__managarm__)
   char exe_path[PATH_MAX];
   const char *aPath = "/proc/self/exe";
   if (sys::fs::exists(aPath)) {
@@ -472,7 +476,7 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
 }
 
 static bool is_local_impl(struct STATVFS &Vfs) {
-#if defined(__linux__) || defined(__GNU__)
+#if defined(__linux__) || defined(__GNU__) || defined(__managarm__)
 #ifndef NFS_SUPER_MAGIC
 #define NFS_SUPER_MAGIC 0x6969
 #endif

>From 4bcc083a72248bbf5bb4aaf70ba8008cfd9630e9 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Thu, 8 May 2025 19:38:01 -0700
Subject: [PATCH 16/25] [clang-format] Handle C# where clause in
 SeparateDefinitionBlocks (#139034)

Fix #61956
---
 clang/lib/Format/DefinitionBlockSeparator.cpp           | 4 ++++
 clang/unittests/Format/DefinitionBlockSeparatorTest.cpp | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/clang/lib/Format/DefinitionBlockSeparator.cpp b/clang/lib/Format/DefinitionBlockSeparator.cpp
index 319236d3bd618..ded51bc6c013c 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.cpp
+++ b/clang/lib/Format/DefinitionBlockSeparator.cpp
@@ -137,6 +137,10 @@ void DefinitionBlockSeparator::separateBlocks(
     const auto MayPrecedeDefinition = [&](const int Direction = -1) {
       assert(Direction >= -1);
       assert(Direction <= 1);
+
+      if (Lines[OpeningLineIndex]->First->is(TT_CSharpGenericTypeConstraint))
+        return true;
+
       const size_t OperateIndex = OpeningLineIndex + Direction;
       assert(OperateIndex < Lines.size());
       const auto &OperateLine = Lines[OperateIndex];
diff --git a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
index b26b9f4f4ff62..1f4245b703fb7 100644
--- a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
+++ b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
@@ -574,6 +574,11 @@ TEST_F(DefinitionBlockSeparatorTest, CSharp) {
                "\r\n"
                "public class FoobarClass {\r\n"
                "  int foobar;\r\n"
+               "}\r\n"
+               "\r\n"
+               "public class LogFactory<TLogger>\r\n"
+               "    where TLogger : class, new() {\r\n"
+               "  int i;\r\n"
                "}",
                Style);
 }

>From db2d5762ebf61b95b0e414b461db68ac49d06b8c Mon Sep 17 00:00:00 2001
From: MingYan <99472920+NexMing at users.noreply.github.com>
Date: Fri, 9 May 2025 10:55:21 +0800
Subject: [PATCH 17/25] [flang][fir] Support promoting `fir.do_loop` with
 results to `affine.for`. (#137790)

Co-authored-by: yanming <ming.yan at terapines.com>
---
 .../Optimizer/Transforms/AffinePromotion.cpp  | 39 +++++++--
 flang/test/Fir/affine-promotion.fir           | 86 +++++++++++++++++++
 2 files changed, 120 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/AffinePromotion.cpp b/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
index 43fccf52dc8ab..ef82e400bea14 100644
--- a/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
+++ b/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
@@ -49,8 +49,9 @@ struct AffineIfAnalysis;
 /// second when doing rewrite.
 struct AffineFunctionAnalysis {
   explicit AffineFunctionAnalysis(mlir::func::FuncOp funcOp) {
-    for (fir::DoLoopOp op : funcOp.getOps<fir::DoLoopOp>())
-      loopAnalysisMap.try_emplace(op, op, *this);
+    funcOp->walk([&](fir::DoLoopOp doloop) {
+      loopAnalysisMap.try_emplace(doloop, doloop, *this);
+    });
   }
 
   AffineLoopAnalysis getChildLoopAnalysis(fir::DoLoopOp op) const;
@@ -102,10 +103,23 @@ struct AffineLoopAnalysis {
     return true;
   }
 
+  bool analysisResults(fir::DoLoopOp loopOperation) {
+    if (loopOperation.getFinalValue() &&
+        !loopOperation.getResult(0).use_empty()) {
+      LLVM_DEBUG(
+          llvm::dbgs()
+              << "AffineLoopAnalysis: cannot promote loop final value\n";);
+      return false;
+    }
+
+    return true;
+  }
+
   bool analyzeLoop(fir::DoLoopOp loopOperation,
                    AffineFunctionAnalysis &functionAnalysis) {
     LLVM_DEBUG(llvm::dbgs() << "AffineLoopAnalysis: \n"; loopOperation.dump(););
     return analyzeMemoryAccess(loopOperation) &&
+           analysisResults(loopOperation) &&
            analyzeBody(loopOperation, functionAnalysis);
   }
 
@@ -461,14 +475,28 @@ class AffineLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
     LLVM_ATTRIBUTE_UNUSED auto loopAnalysis =
         functionAnalysis.getChildLoopAnalysis(loop);
     auto &loopOps = loop.getBody()->getOperations();
+    auto resultOp = cast<fir::ResultOp>(loop.getBody()->getTerminator());
+    auto results = resultOp.getOperands();
+    auto loopResults = loop->getResults();
     auto loopAndIndex = createAffineFor(loop, rewriter);
     auto affineFor = loopAndIndex.first;
     auto inductionVar = loopAndIndex.second;
 
+    if (loop.getFinalValue()) {
+      results = results.drop_front();
+      loopResults = loopResults.drop_front();
+    }
+
     rewriter.startOpModification(affineFor.getOperation());
     affineFor.getBody()->getOperations().splice(
         std::prev(affineFor.getBody()->end()), loopOps, loopOps.begin(),
         std::prev(loopOps.end()));
+    rewriter.replaceAllUsesWith(loop.getRegionIterArgs(),
+                                affineFor.getRegionIterArgs());
+    if (!results.empty()) {
+      rewriter.setInsertionPointToEnd(affineFor.getBody());
+      rewriter.create<affine::AffineYieldOp>(resultOp->getLoc(), results);
+    }
     rewriter.finalizeOpModification(affineFor.getOperation());
 
     rewriter.startOpModification(loop.getOperation());
@@ -479,7 +507,8 @@ class AffineLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
 
     LLVM_DEBUG(llvm::dbgs() << "AffineLoopConversion: loop rewriten to:\n";
                affineFor.dump(););
-    rewriter.replaceOp(loop, affineFor.getOperation()->getResults());
+    rewriter.replaceAllUsesWith(loopResults, affineFor->getResults());
+    rewriter.eraseOp(loop);
     return success();
   }
 
@@ -503,7 +532,7 @@ class AffineLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
         ValueRange(op.getUpperBound()),
         mlir::AffineMap::get(0, 1,
                              1 + mlir::getAffineSymbolExpr(0, op.getContext())),
-        step);
+        step, op.getIterOperands());
     return std::make_pair(affineFor, affineFor.getInductionVar());
   }
 
@@ -528,7 +557,7 @@ class AffineLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
         genericUpperBound.getResult(),
         mlir::AffineMap::get(0, 1,
                              1 + mlir::getAffineSymbolExpr(0, op.getContext())),
-        1);
+        1, op.getIterOperands());
     rewriter.setInsertionPointToStart(affineFor.getBody());
     auto actualIndex = rewriter.create<affine::AffineApplyOp>(
         op.getLoc(), actualIndexMap,
diff --git a/flang/test/Fir/affine-promotion.fir b/flang/test/Fir/affine-promotion.fir
index aae35c6ef5659..46467ab4a292a 100644
--- a/flang/test/Fir/affine-promotion.fir
+++ b/flang/test/Fir/affine-promotion.fir
@@ -131,3 +131,89 @@ func.func @loop_with_if(%a: !arr_d1, %v: f32) {
 // CHECK:   }
 // CHECK:   return
 // CHECK: }
+
+func.func @loop_with_result(%arg0: !fir.ref<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100x100xf32>>, %arg2: !fir.ref<!fir.array<100xf32>>) -> f32 {
+  %c1 = arith.constant 1 : index
+  %cst = arith.constant 0.000000e+00 : f32
+  %c100 = arith.constant 100 : index
+  %0 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %1 = fir.shape %c100, %c100 : (index, index) -> !fir.shape<2>
+  %2 = fir.alloca i32
+  %3:2 = fir.do_loop %arg3 = %c1 to %c100 step %c1 iter_args(%arg4 = %cst) -> (index, f32) {
+    %8 = fir.array_coor %arg0(%0) %arg3 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+    %9 = fir.load %8 : !fir.ref<f32>
+    %10 = arith.addf %arg4, %9 fastmath<contract> : f32
+    %11 = arith.addi %arg3, %c1 overflow<nsw> : index
+    fir.result %11, %10 : index, f32
+  }
+  %4:2 = fir.do_loop %arg3 = %c1 to %c100 step %c1 iter_args(%arg4 = %3#1) -> (index, f32) {
+    %8 = fir.array_coor %arg1(%1) %c1, %arg3 : (!fir.ref<!fir.array<100x100xf32>>, !fir.shape<2>, index, index) -> !fir.ref<f32>
+    %9 = fir.convert %8 : (!fir.ref<f32>) -> !fir.ref<!fir.array<100xf32>>
+    %10 = fir.do_loop %arg5 = %c1 to %c100 step %c1 iter_args(%arg6 = %arg4) -> (f32) {
+      %12 = fir.array_coor %9(%0) %arg5 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+      %13 = fir.load %12 : !fir.ref<f32>
+      %14 = arith.addf %arg6, %13 fastmath<contract> : f32
+      fir.result %14 : f32
+    }
+    %11 = arith.addi %arg3, %c1 overflow<nsw> : index
+    fir.result %11, %10 : index, f32
+  }
+  %5:2 = fir.do_loop %arg3 = %c1 to %c100 step %c1 iter_args(%arg4 = %4#1, %arg5 = %cst) -> (f32, f32) {
+    %8 = fir.array_coor %arg0(%0) %arg3 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+    %9 = fir.load %8 : !fir.ref<f32>
+    %10 = arith.addf %arg4, %9 fastmath<contract> : f32
+    %11 = fir.array_coor %arg2(%0) %arg3 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+    %12 = fir.load %11 : !fir.ref<f32>
+    %13 = arith.addf %arg5, %12 fastmath<contract> : f32
+    fir.result %10, %13 : f32, f32
+  }
+  %6 = arith.addf %5#0, %5#1 fastmath<contract> : f32
+  %7 = fir.convert %4#0 : (index) -> i32
+  fir.store %7 to %2 : !fir.ref<i32>
+  return %6 : f32
+}
+
+// CHECK-LABEL:   func.func @loop_with_result(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>,
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<100x100xf32>>,
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<!fir.array<100xf32>>) -> f32 {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 100 : index
+// CHECK:           %[[VAL_3:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_2]], %[[VAL_2]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = fir.alloca i32
+// CHECK:           %[[VAL_6:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<100xf32>>) -> memref<?xf32>
+// CHECK:           %[[VAL_7:.*]] = affine.for %[[VAL_8:.*]] = %[[VAL_0]] to #{{.*}}(){{\[}}%[[VAL_2]]] iter_args(%[[VAL_9:.*]] = %[[VAL_1]]) -> (f32) {
+// CHECK:             %[[VAL_10:.*]] = affine.apply #{{.*}}(%[[VAL_8]]){{\[}}%[[VAL_0]], %[[VAL_2]], %[[VAL_0]]]
+// CHECK:             %[[VAL_11:.*]] = affine.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
+// CHECK:             %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath<contract> : f32
+// CHECK:             affine.yield %[[VAL_12]] : f32
+// CHECK:           }
+// CHECK:           %[[VAL_13:.*]]:2 = fir.do_loop %[[VAL_14:.*]] = %[[VAL_0]] to %[[VAL_2]] step %[[VAL_0]] iter_args(%[[VAL_15:.*]] = %[[VAL_7]]) -> (index, f32) {
+// CHECK:             %[[VAL_16:.*]] = fir.array_coor %[[ARG1]](%[[VAL_4]]) %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x100xf32>>, !fir.shape<2>, index, index) -> !fir.ref<f32>
+// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (!fir.ref<f32>) -> !fir.ref<!fir.array<100xf32>>
+// CHECK:             %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (!fir.ref<!fir.array<100xf32>>) -> memref<?xf32>
+// CHECK:             %[[VAL_19:.*]] = affine.for %[[VAL_20:.*]] = %[[VAL_0]] to #{{.*}}(){{\[}}%[[VAL_2]]] iter_args(%[[VAL_21:.*]] = %[[VAL_15]]) -> (f32) {
+// CHECK:               %[[VAL_22:.*]] = affine.apply #{{.*}}(%[[VAL_20]]){{\[}}%[[VAL_0]], %[[VAL_2]], %[[VAL_0]]]
+// CHECK:               %[[VAL_23:.*]] = affine.load %[[VAL_18]]{{\[}}%[[VAL_22]]] : memref<?xf32>
+// CHECK:               %[[VAL_24:.*]] = arith.addf %[[VAL_21]], %[[VAL_23]] fastmath<contract> : f32
+// CHECK:               affine.yield %[[VAL_24]] : f32
+// CHECK:             }
+// CHECK:             %[[VAL_25:.*]] = arith.addi %[[VAL_14]], %[[VAL_0]] overflow<nsw> : index
+// CHECK:             fir.result %[[VAL_25]], %[[VAL_19]] : index, f32
+// CHECK:           }
+// CHECK:           %[[VAL_26:.*]] = fir.convert %[[ARG2]] : (!fir.ref<!fir.array<100xf32>>) -> memref<?xf32>
+// CHECK:           %[[VAL_27:.*]]:2 = affine.for %[[VAL_28:.*]] = %[[VAL_0]] to #{{.*}}(){{\[}}%[[VAL_2]]] iter_args(%[[VAL_29:.*]] = %[[VAL_30:.*]]#1, %[[VAL_31:.*]] = %[[VAL_1]]) -> (f32, f32) {
+// CHECK:             %[[VAL_32:.*]] = affine.apply #{{.*}}(%[[VAL_28]]){{\[}}%[[VAL_0]], %[[VAL_2]], %[[VAL_0]]]
+// CHECK:             %[[VAL_33:.*]] = affine.load %[[VAL_6]]{{\[}}%[[VAL_32]]] : memref<?xf32>
+// CHECK:             %[[VAL_34:.*]] = arith.addf %[[VAL_29]], %[[VAL_33]] fastmath<contract> : f32
+// CHECK:             %[[VAL_35:.*]] = affine.load %[[VAL_26]]{{\[}}%[[VAL_32]]] : memref<?xf32>
+// CHECK:             %[[VAL_36:.*]] = arith.addf %[[VAL_31]], %[[VAL_35]] fastmath<contract> : f32
+// CHECK:             affine.yield %[[VAL_34]], %[[VAL_36]] : f32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_37:.*]] = arith.addf %[[VAL_38:.*]]#0, %[[VAL_38]]#1 fastmath<contract> : f32
+// CHECK:           %[[VAL_39:.*]] = fir.convert %[[VAL_40:.*]]#0 : (index) -> i32
+// CHECK:           fir.store %[[VAL_39]] to %[[VAL_5]] : !fir.ref<i32>
+// CHECK:           return %[[VAL_37]] : f32
+// CHECK:         }

>From 78cc822aa6f5af0eda55089d22ba915b6d8e0216 Mon Sep 17 00:00:00 2001
From: lntue <lntue at google.com>
Date: Thu, 8 May 2025 21:23:09 -0600
Subject: [PATCH 18/25] [libc][math] Implement double precision acos correctly
 rounded for all rounding modes. (#138308)

We reduce computation of `acos` to `asin` as follow:

When `|x| < 0.5`:
```math
acos(x) = \frac{\pi}{2} - asin(x).
```
For `0.5 <= |x| < 1`, let
```math
u = \frac{1 - \left| x \right|}{2},
```
then
```math
acos(x) = \begin{cases}
  2 \cdot asin \left( \sqrt{u} \right) &, 0.5 \leq x < 1 \\
  \pi - 2 \cdot asin \left( \sqrt{u} \right) &, -1 < x \leq 0.5
\end{cases}
```
---
 libc/config/darwin/arm/entrypoints.txt    |   1 +
 libc/config/linux/aarch64/entrypoints.txt |   1 +
 libc/config/linux/arm/entrypoints.txt     |   1 +
 libc/config/linux/riscv/entrypoints.txt   |   1 +
 libc/config/linux/x86_64/entrypoints.txt  |   1 +
 libc/config/windows/entrypoints.txt       |   1 +
 libc/docs/headers/math/index.rst          |   2 +-
 libc/include/math.yaml                    |   6 +
 libc/src/math/generic/CMakeLists.txt      |  21 ++
 libc/src/math/generic/acos.cpp            | 278 ++++++++++++++++++++++
 libc/src/math/generic/asin.cpp            |  14 +-
 libc/src/math/generic/asin_utils.h        |   8 +-
 libc/test/src/math/CMakeLists.txt         |  11 +
 libc/test/src/math/acos_test.cpp          |  82 +++++++
 libc/test/src/math/smoke/CMakeLists.txt   |  12 +
 libc/test/src/math/smoke/acos_test.cpp    |  64 +++++
 16 files changed, 496 insertions(+), 8 deletions(-)
 create mode 100644 libc/src/math/generic/acos.cpp
 create mode 100644 libc/test/src/math/acos_test.cpp
 create mode 100644 libc/test/src/math/smoke/acos_test.cpp

diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index 70c888aec064c..308fc49d681d7 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -135,6 +135,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 66d7576ffb882..520046f768b5d 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -410,6 +410,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index b5e2f59d25a54..7432a7e912e81 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -242,6 +242,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 30d9d00dfefc9..c2a31b9f5c964 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -416,6 +416,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 2ac016f065b2e..9f447dd0d35d2 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -415,6 +415,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 37fa888d6498a..09021a08cf731 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -127,6 +127,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst
index 9d4d6698b6122..6b0365f481a4c 100644
--- a/libc/docs/headers/math/index.rst
+++ b/libc/docs/headers/math/index.rst
@@ -249,7 +249,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | <Func>    | <Func_f> (float) | <Func> (double) | <Func_l> (long double) | <Func_f16> (float16) | <Func_f128> (float128) | C23 Definition Section | C23 Error Handling Section |
 +===========+==================+=================+========================+======================+========================+========================+============================+
-| acos      | |check|          |                 |                        | |check|              |                        | 7.12.4.1               | F.10.1.1                   |
+| acos      | |check|          | |check|         |                        | |check|              |                        | 7.12.4.1               | F.10.1.1                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | acosh     | |check|          |                 |                        | |check|              |                        | 7.12.5.1               | F.10.2.1                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/include/math.yaml b/libc/include/math.yaml
index f8c57f7d48ab2..0a69653d156af 100644
--- a/libc/include/math.yaml
+++ b/libc/include/math.yaml
@@ -8,6 +8,12 @@ types:
 enums: []
 objects: []
 functions:
+  - name: acos
+    standards:
+      - stdc
+    return_type: double
+    arguments:
+      - type: double
   - name: acosf
     standards:
       - stdc
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index b08486ac61366..e5d1f9075bcb1 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -4117,6 +4117,7 @@ add_entrypoint_object(
   HDRS
     ../asin.h
   DEPENDS
+    .asin_utils
     libc.src.__support.FPUtil.double_double
     libc.src.__support.FPUtil.dyadic_float
     libc.src.__support.FPUtil.fenv_impl
@@ -4164,6 +4165,26 @@ add_entrypoint_object(
     libc.src.__support.macros.properties.types  
 )
 
+add_entrypoint_object(
+  acos
+  SRCS
+    acos.cpp
+  HDRS
+    ../acos.h
+  DEPENDS
+    .asin_utils
+    libc.src.__support.FPUtil.double_double
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.types  
+    libc.src.__support.macros.properties.cpu_features
+)
+
 add_entrypoint_object(
   acospif16
   SRCS
diff --git a/libc/src/math/generic/acos.cpp b/libc/src/math/generic/acos.cpp
new file mode 100644
index 0000000000000..c14721faef3ce
--- /dev/null
+++ b/libc/src/math/generic/acos.cpp
@@ -0,0 +1,278 @@
+//===-- Double-precision acos function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/acos.h"
+#include "asin_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+namespace LIBC_NAMESPACE_DECL {
+
+using DoubleDouble = fputil::DoubleDouble;
+using Float128 = fputil::DyadicFloat<128>;
+
+LLVM_LIBC_FUNCTION(double, acos, (double x)) {
+  using FPBits = fputil::FPBits<double>;
+
+  FPBits xbits(x);
+  int x_exp = xbits.get_biased_exponent();
+
+  // |x| < 0.5.
+  if (x_exp < FPBits::EXP_BIAS - 1) {
+    // |x| < 2^-55.
+    if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 55)) {
+      // When |x| < 2^-55, acos(x) = pi/2
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS)
+      return PI_OVER_TWO.hi;
+#else
+      // Force the evaluation and prevent constant propagation so that it
+      // is rounded correctly for FE_UPWARD rounding mode.
+      return (xbits.abs().get_val() + 0x1.0p-160) + PI_OVER_TWO.hi;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    }
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    // acos(x) = pi/2 - asin(x)
+    //         = pi/2 - x * P(x^2)
+    double p = asin_eval(x * x);
+    return PI_OVER_TWO.hi + fputil::multiply_add(-x, p, PI_OVER_TWO.lo);
+#else
+    unsigned idx;
+    DoubleDouble x_sq = fputil::exact_mult(x, x);
+    double err = xbits.abs().get_val() * 0x1.0p-51;
+    // Polynomial approximation:
+    //   p ~ asin(x)/x
+    DoubleDouble p = asin_eval(x_sq, idx, err);
+    // asin(x) ~ x * p
+    DoubleDouble r0 = fputil::exact_mult(x, p.hi);
+    // acos(x) = pi/2 - asin(x)
+    //         ~ pi/2 - x * p
+    //         = pi/2 - x * (p.hi + p.lo)
+    double r_hi = fputil::multiply_add(-x, p.hi, PI_OVER_TWO.hi);
+    // Use Dekker's 2SUM algorithm to compute the lower part.
+    double r_lo = ((PI_OVER_TWO.hi - r_hi) - r0.hi) - r0.lo;
+    r_lo = fputil::multiply_add(-x, p.lo, r_lo + PI_OVER_TWO.lo);
+
+    // Ziv's accuracy test.
+
+    double r_upper = r_hi + (r_lo + err);
+    double r_lower = r_hi + (r_lo - err);
+
+    if (LIBC_LIKELY(r_upper == r_lower))
+      return r_upper;
+
+    // Ziv's accuracy test failed, perform 128-bit calculation.
+
+    // Recalculate mod 1/64.
+    idx = static_cast<unsigned>(fputil::nearest_integer(x_sq.hi * 0x1.0p6));
+
+    // Get x^2 - idx/64 exactly.  When FMA is available, double-double
+    // multiplication will be correct for all rounding modes.  Otherwise we use
+    // Float128 directly.
+    Float128 x_f128(x);
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+    // u = x^2 - idx/64
+    Float128 u_hi(
+        fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi));
+    Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo));
+#else
+    Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128);
+    Float128 u = fputil::quick_add(
+        x_sq_f128, Float128(static_cast<double>(idx) * (-0x1.0p-6)));
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+    Float128 p_f128 = asin_eval(u, idx);
+    // Flip the sign of x_f128 to perform subtraction.
+    x_f128.sign = x_f128.sign.negate();
+    Float128 r =
+        fputil::quick_add(PI_OVER_TWO_F128, fputil::quick_mul(x_f128, p_f128));
+
+    return static_cast<double>(r);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  }
+  // |x| >= 0.5
+
+  double x_abs = xbits.abs().get_val();
+
+  // Maintaining the sign:
+  constexpr double SIGN[2] = {1.0, -1.0};
+  double x_sign = SIGN[xbits.is_neg()];
+  // |x| >= 1
+  if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) {
+    // x = +-1, asin(x) = +- pi/2
+    if (x_abs == 1.0) {
+      // x = 1, acos(x) = 0,
+      // x = -1, acos(x) = pi
+      return x == 1.0 ? 0.0 : fputil::multiply_add(-x_sign, PI.hi, PI.lo);
+    }
+    // |x| > 1, return NaN.
+    if (xbits.is_quiet_nan())
+      return x;
+
+    // Set domain error for non-NaN input.
+    if (!xbits.is_nan())
+      fputil::set_errno_if_required(EDOM);
+
+    fputil::raise_except_if_required(FE_INVALID);
+    return FPBits::quiet_nan().get_val();
+  }
+
+  // When |x| >= 0.5, we perform range reduction as follow:
+  //
+  // When 0.5 <= x < 1, let:
+  //   y = acos(x)
+  // We will use the double angle formula:
+  //   cos(2y) = 1 - 2 sin^2(y)
+  // and the complement angle identity:
+  //   x = cos(y) = 1 - 2 sin^2 (y/2)
+  // So:
+  //   sin(y/2) = sqrt( (1 - x)/2 )
+  // And hence:
+  //   y/2 = asin( sqrt( (1 - x)/2 ) )
+  // Equivalently:
+  //   acos(x) = y = 2 * asin( sqrt( (1 - x)/2 ) )
+  // Let u = (1 - x)/2, then:
+  //   acos(x) = 2 * asin( sqrt(u) )
+  // Moreover, since 0.5 <= x < 1:
+  //   0 < u <= 1/4, and 0 < sqrt(u) <= 0.5,
+  // And hence we can reuse the same polynomial approximation of asin(x) when
+  // |x| <= 0.5:
+  //   acos(x) ~ 2 * sqrt(u) * P(u).
+  //
+  // When -1 < x <= -0.5, we reduce to the previous case using the formula:
+  //   acos(x) = pi - acos(-x)
+  //           = pi - 2 * asin ( sqrt( (1 + x)/2 ) )
+  //           ~ pi - 2 * sqrt(u) * P(u),
+  // where u = (1 - |x|)/2.
+
+  // u = (1 - |x|)/2
+  double u = fputil::multiply_add(x_abs, -0.5, 0.5);
+  // v_hi + v_lo ~ sqrt(u).
+  // Let:
+  //   h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
+  // Then:
+  //   sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+  //            ~ v_hi + h / (2 * v_hi)
+  // So we can use:
+  //   v_lo = h / (2 * v_hi).
+  double v_hi = fputil::sqrt<double>(u);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  constexpr DoubleDouble CONST_TERM[2] = {{0.0, 0.0}, PI};
+  DoubleDouble const_term = CONST_TERM[xbits.is_neg()];
+
+  double p = asin_eval(u);
+  double scale = x_sign * 2.0 * v_hi;
+  double r = const_term.hi + fputil::multiply_add(scale, p, const_term.lo);
+  return r;
+#else
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  double h = fputil::multiply_add(v_hi, -v_hi, u);
+#else
+  DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi);
+  double h = (u - v_hi_sq.hi) - v_hi_sq.lo;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+  // Scale v_lo and v_hi by 2 from the formula:
+  //   vh = v_hi * 2
+  //   vl = 2*v_lo = h / v_hi.
+  double vh = v_hi * 2.0;
+  double vl = h / v_hi;
+
+  // Polynomial approximation:
+  //   p ~ asin(sqrt(u))/sqrt(u)
+  unsigned idx;
+  double err = vh * 0x1.0p-51;
+
+  DoubleDouble p = asin_eval(DoubleDouble{0.0, u}, idx, err);
+
+  // Perform computations in double-double arithmetic:
+  //   asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
+  DoubleDouble r0 = fputil::quick_mult(DoubleDouble{vl, vh}, p);
+
+  double r_hi, r_lo;
+  if (xbits.is_pos()) {
+    r_hi = r0.hi;
+    r_lo = r0.lo;
+  } else {
+    DoubleDouble r = fputil::exact_add(PI.hi, -r0.hi);
+    r_hi = r.hi;
+    r_lo = (PI.lo - r0.lo) + r.lo;
+  }
+
+  // Ziv's accuracy test.
+
+  double r_upper = r_hi + (r_lo + err);
+  double r_lower = r_hi + (r_lo - err);
+
+  if (LIBC_LIKELY(r_upper == r_lower))
+    return r_upper;
+
+  // Ziv's accuracy test failed, we redo the computations in Float128.
+  // Recalculate mod 1/64.
+  idx = static_cast<unsigned>(fputil::nearest_integer(u * 0x1.0p6));
+
+  // After the first step of Newton-Raphson approximating v = sqrt(u), we have
+  // that:
+  //   sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+  //      v_lo = h / (2 * v_hi)
+  // With error:
+  //   sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
+  //                           = -h^2 / (2*v * (sqrt(u) + v)^2).
+  // Since:
+  //   (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
+  // we can add another correction term to (v_hi + v_lo) that is:
+  //   v_ll = -h^2 / (2*v_hi * 4u)
+  //        = -v_lo * (h / 4u)
+  //        = -vl * (h / 8u),
+  // making the errors:
+  //   sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
+  // well beyond 128-bit precision needed.
+
+  // Get the rounding error of vl = 2 * v_lo ~ h / vh
+  // Get full product of vh * vl
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi;
+#else
+  DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl);
+  double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  // vll = 2*v_ll = -vl * (h / (4u)).
+  double t = h * (-0.25) / u;
+  double vll = fputil::multiply_add(vl, t, vl_lo);
+  // m_v = -(v_hi + v_lo + v_ll).
+  Float128 m_v = fputil::quick_add(
+      Float128(vh), fputil::quick_add(Float128(vl), Float128(vll)));
+  m_v.sign = xbits.sign();
+
+  // Perform computations in Float128:
+  //   acos(x) = (v_hi + v_lo + vll) * P(u)         , when 0.5 <= x < 1,
+  //           = pi - (v_hi + v_lo + vll) * P(u)    , when -1 < x <= -0.5.
+  Float128 y_f128(fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, u));
+
+  Float128 p_f128 = asin_eval(y_f128, idx);
+  Float128 r_f128 = fputil::quick_mul(m_v, p_f128);
+
+  if (xbits.is_neg())
+    r_f128 = fputil::quick_add(PI_F128, r_f128);
+
+  return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asin.cpp b/libc/src/math/generic/asin.cpp
index ee85cb1988bb2..ad77683d1f880 100644
--- a/libc/src/math/generic/asin.cpp
+++ b/libc/src/math/generic/asin.cpp
@@ -74,7 +74,7 @@ LLVM_LIBC_FUNCTION(double, asin, (double x)) {
 #else
     unsigned idx;
     DoubleDouble x_sq = fputil::exact_mult(x, x);
-    double err = x * 0x1.0p-51;
+    double err = xbits.abs().get_val() * 0x1.0p-51;
     // Polynomial approximation:
     //   p ~ asin(x)/x
 
@@ -135,12 +135,14 @@ LLVM_LIBC_FUNCTION(double, asin, (double x)) {
                                   x_sign * PI_OVER_TWO.lo);
     }
     // |x| > 1, return NaN.
-    if (xbits.is_finite()) {
+    if (xbits.is_quiet_nan())
+      return x;
+
+    // Set domain error for non-NaN input.
+    if (!xbits.is_nan())
       fputil::set_errno_if_required(EDOM);
-      fputil::raise_except_if_required(FE_INVALID);
-    } else if (xbits.is_signaling_nan()) {
-      fputil::raise_except_if_required(FE_INVALID);
-    }
+
+    fputil::raise_except_if_required(FE_INVALID);
     return FPBits::quiet_nan().get_val();
   }
 
diff --git a/libc/src/math/generic/asin_utils.h b/libc/src/math/generic/asin_utils.h
index 0bdf25367f820..44913d573de2c 100644
--- a/libc/src/math/generic/asin_utils.h
+++ b/libc/src/math/generic/asin_utils.h
@@ -25,6 +25,8 @@ namespace {
 using DoubleDouble = fputil::DoubleDouble;
 using Float128 = fputil::DyadicFloat<128>;
 
+constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, 0x1.921fb54442d18p1};
+
 constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54,
                                       0x1.921fb54442d18p0};
 
@@ -172,7 +174,8 @@ LIBC_INLINE DoubleDouble asin_eval(const DoubleDouble &u, unsigned &idx,
   double y_hi = multiply_add(k, -0x1.0p-5, u.hi); // Exact
   DoubleDouble y = fputil::exact_add(y_hi, u.lo);
   double y2 = y.hi * y.hi;
-  err *= y2 + 0x1.0p-30;
+  // Add double-double errors in addition to the relative errors from y2.
+  err = fputil::multiply_add(err, y2, 0x1.0p-102);
   DoubleDouble c0 = fputil::quick_mult(
       y, DoubleDouble{ASIN_COEFFS[idx][3], ASIN_COEFFS[idx][2]});
   double c1 = multiply_add(y.hi, ASIN_COEFFS[idx][5], ASIN_COEFFS[idx][4]);
@@ -548,6 +551,9 @@ constexpr Float128 ASIN_COEFFS_F128[17][16] = {
 constexpr Float128 PI_OVER_TWO_F128 = {
     Sign::POS, -127, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128};
 
+constexpr Float128 PI_F128 = {Sign::POS, -126,
+                              0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128};
+
 LIBC_INLINE Float128 asin_eval(const Float128 &u, unsigned idx) {
   return fputil::polyeval(u, ASIN_COEFFS_F128[idx][0], ASIN_COEFFS_F128[idx][1],
                           ASIN_COEFFS_F128[idx][2], ASIN_COEFFS_F128[idx][3],
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index fba6b6f133163..9074524403102 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -2280,6 +2280,17 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  acos_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    acos_test.cpp
+  DEPENDS
+    libc.src.math.acos
+)
+
 add_fp_unittest(
   acosf16_test
   NEED_MPFR
diff --git a/libc/test/src/math/acos_test.cpp b/libc/test/src/math/acos_test.cpp
new file mode 100644
index 0000000000000..140488702f0bc
--- /dev/null
+++ b/libc/test/src/math/acos_test.cpp
@@ -0,0 +1,82 @@
+//===-- Unittests for acos ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/acos.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcAcosTest = LIBC_NAMESPACE::testing::FPTest<double>;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+using LIBC_NAMESPACE::testing::tlog;
+
+TEST_F(LlvmLibcAcosTest, InDoubleRange) {
+  constexpr uint64_t COUNT = 123'451;
+  uint64_t START = FPBits(0x1.0p-60).uintval();
+  uint64_t STOP = FPBits(1.0).uintval();
+  uint64_t STEP = (STOP - START) / COUNT;
+
+  auto test = [&](mpfr::RoundingMode rounding_mode) {
+    mpfr::ForceRoundingMode __r(rounding_mode);
+    if (!__r.success)
+      return;
+
+    uint64_t fails = 0;
+    uint64_t count = 0;
+    uint64_t cc = 0;
+    double mx = 0.0, mr = 0.0;
+    double tol = 0.5;
+
+    for (uint64_t i = 0, v = START; i <= COUNT; ++i, v += STEP) {
+      double x = FPBits(v).get_val();
+      if (FPBits(v).is_inf_or_nan())
+        continue;
+      double result = LIBC_NAMESPACE::acos(x);
+      ++cc;
+      if (FPBits(result).is_inf_or_nan())
+        continue;
+
+      ++count;
+
+      if (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Acos, x, result,
+                                             0.5, rounding_mode)) {
+        ++fails;
+        while (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Acos, x,
+                                                  result, tol, rounding_mode)) {
+          mx = x;
+          mr = result;
+
+          if (tol > 1000.0)
+            break;
+
+          tol *= 2.0;
+        }
+      }
+    }
+    if (fails) {
+      tlog << " Acos failed: " << fails << "/" << count << "/" << cc
+           << " tests.\n";
+      tlog << "   Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
+      EXPECT_MPFR_MATCH(mpfr::Operation::Acos, mx, mr, 0.5, rounding_mode);
+    }
+  };
+
+  tlog << " Test Rounding To Nearest...\n";
+  test(mpfr::RoundingMode::Nearest);
+
+  tlog << " Test Rounding Downward...\n";
+  test(mpfr::RoundingMode::Downward);
+
+  tlog << " Test Rounding Upward...\n";
+  test(mpfr::RoundingMode::Upward);
+
+  tlog << " Test Rounding Toward Zero...\n";
+  test(mpfr::RoundingMode::TowardZero);
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index ca1ce7fb1f48c..55edffb4f5bdf 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -4051,6 +4051,18 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  acos_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    acos_test.cpp
+  DEPENDS
+    libc.hdr.fenv_macros
+    libc.src.errno.errno
+    libc.src.math.acos
+)
+
 add_fp_unittest(
   acosf16_test
   SUITE
diff --git a/libc/test/src/math/smoke/acos_test.cpp b/libc/test/src/math/smoke/acos_test.cpp
new file mode 100644
index 0000000000000..3a59bce264077
--- /dev/null
+++ b/libc/test/src/math/smoke/acos_test.cpp
@@ -0,0 +1,64 @@
+//===-- Unittests for acos ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/fenv_macros.h"
+#include "src/errno/libc_errno.h"
+#include "src/math/acos.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcAcosTest = LIBC_NAMESPACE::testing::FPTest<double>;
+
+TEST_F(LlvmLibcAcosTest, SpecialNumbers) {
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acos(sNaN),
+                                           FE_INVALID);
+  EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acos(aNaN));
+  EXPECT_FP_EQ(0x1.921fb54442d18p0, LIBC_NAMESPACE::acos(zero));
+  EXPECT_FP_EQ(0x1.921fb54442d18p0, LIBC_NAMESPACE::acos(neg_zero));
+
+  LIBC_NAMESPACE::libc_errno = 0;
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acos(inf),
+                                           FE_INVALID);
+  EXPECT_MATH_ERRNO(EDOM);
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acos(neg_inf),
+                                           FE_INVALID);
+  EXPECT_MATH_ERRNO(EDOM);
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acos(2.0),
+                                           FE_INVALID);
+  EXPECT_MATH_ERRNO(EDOM);
+  EXPECT_FP_EQ_WITH_EXCEPTION_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acos(-2.0),
+                                           FE_INVALID);
+  EXPECT_MATH_ERRNO(EDOM);
+  EXPECT_FP_EQ(zero, LIBC_NAMESPACE::acos(1.0));
+  EXPECT_FP_EQ(0x1.921fb54442d18p1, LIBC_NAMESPACE::acos(-1.0));
+  EXPECT_FP_EQ(0x1.921fb54442d18p0, LIBC_NAMESPACE::acos(0x1.0p-54));
+}
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcAcosTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0x1.921fb54442d18p0, LIBC_NAMESPACE::acos(min_denormal));
+}
+
+TEST_F(LlvmLibcAcosTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0x1.921fb54442d18p0, LIBC_NAMESPACE::acos(min_denormal));
+}
+
+TEST_F(LlvmLibcAcosTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0x1.921fb54442d18p0, LIBC_NAMESPACE::acos(min_denormal));
+}
+
+#endif

>From 4190d6cc379a061e9d9ee9563a6f47a062a2ceef Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Fri, 9 May 2025 11:36:41 +0800
Subject: [PATCH 19/25] [NFC][RISCV] Add more test cases for multiplication
 (#139195)

---
 llvm/test/CodeGen/RISCV/mul.ll | 136 +++++++++++++++++++++++++++++++--
 1 file changed, 131 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 548c7e1c6ea8c..9447dcaf72373 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -494,6 +494,37 @@ define i32 @muli32_p14(i32 %a) nounwind {
   ret i32 %1
 }
 
+define i32 @muli32_p18(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p18:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 18
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p18:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 18
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p18:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 18
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p18:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 18
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 18
+  ret i32 %1
+}
+
 define i32 @muli32_p28(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p28:
 ; RV32I:       # %bb.0:
@@ -554,6 +585,68 @@ define i32 @muli32_p30(i32 %a) nounwind {
   ret i32 %1
 }
 
+define i32 @muli32_p34(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p34:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 34
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p34:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 34
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p34:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 34
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p34:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 34
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 34
+  ret i32 %1
+}
+
+define i32 @muli32_p36(i32 %a) nounwind {
+; RV32I-LABEL: muli32_p36:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 36
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV32IM-LABEL: muli32_p36:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a1, 36
+; RV32IM-NEXT:    mul a0, a0, a1
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli32_p36:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 36
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IM-LABEL: muli32_p36:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 36
+; RV64IM-NEXT:    mulw a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i32 %a, 36
+  ret i32 %1
+}
+
 define i32 @muli32_p56(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_p56:
 ; RV32I:       # %bb.0:
@@ -778,7 +871,40 @@ define i64 @muli64_p63(i64 %a) nounwind {
   ret i64 %1
 }
 
-
+define i64 @muli64_p72(i64 %a) nounwind {
+; RV32I-LABEL: muli64_p72:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a2, 72
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IM-LABEL: muli64_p72:
+; RV32IM:       # %bb.0:
+; RV32IM-NEXT:    li a2, 72
+; RV32IM-NEXT:    mul a1, a1, a2
+; RV32IM-NEXT:    mulhu a3, a0, a2
+; RV32IM-NEXT:    add a1, a3, a1
+; RV32IM-NEXT:    mul a0, a0, a2
+; RV32IM-NEXT:    ret
+;
+; RV64I-LABEL: muli64_p72:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 72
+; RV64I-NEXT:    tail __muldi3
+;
+; RV64IM-LABEL: muli64_p72:
+; RV64IM:       # %bb.0:
+; RV64IM-NEXT:    li a1, 72
+; RV64IM-NEXT:    mul a0, a0, a1
+; RV64IM-NEXT:    ret
+  %1 = mul i64 %a, 72
+  ret i64 %1
+}
 
 define i32 @muli32_m63(i32 %a) nounwind {
 ; RV32I-LABEL: muli32_m63:
@@ -1327,10 +1453,10 @@ define i128 @muli128_m3840(i128 %a) nounwind {
 ; RV32I-NEXT:    sltu a7, a5, a4
 ; RV32I-NEXT:    sub a6, a6, t2
 ; RV32I-NEXT:    mv t1, a7
-; RV32I-NEXT:    beq t0, a3, .LBB36_2
+; RV32I-NEXT:    beq t0, a3, .LBB40_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu t1, t0, a3
-; RV32I-NEXT:  .LBB36_2:
+; RV32I-NEXT:  .LBB40_2:
 ; RV32I-NEXT:    sub a2, a2, a1
 ; RV32I-NEXT:    sub a1, t0, a3
 ; RV32I-NEXT:    sub a5, a5, a4
@@ -1441,10 +1567,10 @@ define i128 @muli128_m63(i128 %a) nounwind {
 ; RV32I-NEXT:    sltu a7, a3, a6
 ; RV32I-NEXT:    or t0, t0, a5
 ; RV32I-NEXT:    mv a5, a7
-; RV32I-NEXT:    beq a4, t0, .LBB37_2
+; RV32I-NEXT:    beq a4, t0, .LBB41_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    sltu a5, a4, t0
-; RV32I-NEXT:  .LBB37_2:
+; RV32I-NEXT:  .LBB41_2:
 ; RV32I-NEXT:    srli t1, a4, 26
 ; RV32I-NEXT:    slli t2, a2, 6
 ; RV32I-NEXT:    srli t3, a2, 26

>From d915355ef88e24b1b06192b8f878e2079f9f6165 Mon Sep 17 00:00:00 2001
From: Iris Shi <0.0 at owo.li>
Date: Fri, 9 May 2025 11:45:14 +0800
Subject: [PATCH 20/25] [NFC][RISCV] Pre-commit tests for RVI constant
 multiplication expansion (#139200)

---
 llvm/test/CodeGen/RISCV/mul-expand.ll | 490 ++++++++++++++++++++++++++
 1 file changed, 490 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/mul-expand.ll

diff --git a/llvm/test/CodeGen/RISCV/mul-expand.ll b/llvm/test/CodeGen/RISCV/mul-expand.ll
new file mode 100644
index 0000000000000..5bb74bc184d8b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/mul-expand.ll
@@ -0,0 +1,490 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I %s
+
+define i32 @muli32_0x555(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0x555:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    li a1, 1365
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV64I-LABEL: muli32_0x555:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    li a1, 1365
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, 1365
+  ret i32 %a1
+}
+
+define i64 @muli64_0x555(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0x555:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a2, 1365
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0x555:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    li a1, 1365
+; RV64I-NEXT:    tail __muldi3
+  %a1 = mul i64 %a, 1365
+  ret i64 %a1
+}
+
+define i32 @muli32_0x33333333(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0x33333333:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV64I-LABEL: muli32_0x33333333:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, 858993459
+  ret i32 %a1
+}
+
+define i64 @muli64_0x33333333(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0x33333333:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0x33333333:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 209715
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    tail __muldi3
+  %a1 = mul i64 %a, 858993459
+  ret i64 %a1
+}
+
+define i32 @muli32_0xaaaaaaaa(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0xaaaaaaaa:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 699051
+; RV32I-NEXT:    addi a1, a1, -1366
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV64I-LABEL: muli32_0xaaaaaaaa:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a1, 699051
+; RV64I-NEXT:    addiw a1, a1, -1366
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, -1431655766
+  ret i32 %a1
+}
+
+define i64 @muli64_0xaaaaaaaa(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0xaaaaaaaa:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0xaaaaaaaa:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    tail __muldi3
+  %a1 = mul i64 %a, 2863311530
+  ret i64 %a1
+}
+
+define i32 @muli32_0x0fffffff(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0x0fffffff:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 28
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli32_0x0fffffff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 28
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, 268435455
+  ret i32 %a1
+}
+
+define i64 @muli64_0x0fffffff(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0x0fffffff:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 28
+; RV32I-NEXT:    srli a3, a0, 4
+; RV32I-NEXT:    slli a4, a1, 28
+; RV32I-NEXT:    sltu a5, a2, a0
+; RV32I-NEXT:    or a3, a4, a3
+; RV32I-NEXT:    sub a1, a3, a1
+; RV32I-NEXT:    sub a1, a1, a5
+; RV32I-NEXT:    sub a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0x0fffffff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 28
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    ret
+  %a1 = mul i64 %a, 268435455
+  ret i64 %a1
+}
+
+define i32 @muli32_0xf0f0f0f0(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0xf0f0f0f0:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 986895
+; RV32I-NEXT:    addi a1, a1, 240
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV64I-LABEL: muli32_0xf0f0f0f0:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a1, 986895
+; RV64I-NEXT:    addiw a1, a1, 240
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, -252645136
+  ret i32 %a1
+}
+
+define i64 @muli64_0xf0f0f0f0(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0xf0f0f0f0:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a2, 986895
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0xf0f0f0f0:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 4
+; RV64I-NEXT:    tail __muldi3
+  %a1 = mul i64 %a, 4042322160
+  ret i64 %a1
+}
+
+define i32 @muli32_0xf7f7f7f7(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0xf7f7f7f7:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 1015679
+; RV32I-NEXT:    addi a1, a1, 2039
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV64I-LABEL: muli32_0xf7f7f7f7:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a1, 1015679
+; RV64I-NEXT:    addiw a1, a1, 2039
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, -134744073
+  ret i32 %a1
+}
+
+define i64 @muli64_0xf7f7f7f7(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0xf7f7f7f7:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a2, 1015679
+; RV32I-NEXT:    addi a2, a2, 2039
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0xf7f7f7f7:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 248
+; RV64I-NEXT:    addiw a1, a1, -129
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 2039
+; RV64I-NEXT:    tail __muldi3
+  %a1 = mul i64 %a, 4160223223
+  ret i64 %a1
+}
+
+define i32 @muli32_0x1000(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0x1000:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 12
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli32_0x1000:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slliw a0, a0, 12
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, 4096
+  ret i32 %a1
+}
+
+define i64 @muli64_0x1000(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0x1000:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a0, 20
+; RV32I-NEXT:    slli a1, a1, 12
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a0, a0, 12
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0x1000:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 12
+; RV64I-NEXT:    ret
+  %a1 = mul i64 %a, 4096
+  ret i64 %a1
+}
+
+define i32 @muli32_0x101(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0x101:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli32_0x101:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    addw a0, a1, a0
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, 257
+  ret i32 %a1
+}
+
+define i64 @muli64_0x101(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0x101:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    srli a3, a0, 24
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    add a0, a2, a0
+; RV32I-NEXT:    or a3, a4, a3
+; RV32I-NEXT:    sltu a2, a0, a2
+; RV32I-NEXT:    add a1, a3, a1
+; RV32I-NEXT:    add a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0x101:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+  %a1 = mul i64 %a, 257
+  ret i64 %a1
+}
+
+define i32 @muli32_0xfff(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0xfff:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 12
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli32_0xfff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 12
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, 4095
+  ret i32 %a1
+}
+
+define i64 @muli64_0xfff(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0xfff:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 12
+; RV32I-NEXT:    srli a3, a0, 20
+; RV32I-NEXT:    slli a4, a1, 12
+; RV32I-NEXT:    sltu a5, a2, a0
+; RV32I-NEXT:    or a3, a4, a3
+; RV32I-NEXT:    sub a1, a3, a1
+; RV32I-NEXT:    sub a1, a1, a5
+; RV32I-NEXT:    sub a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0xfff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 12
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    ret
+  %a1 = mul i64 %a, 4095
+  ret i64 %a1
+}
+
+define i32 @muli32_0x7fffffff(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0x7fffffff:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 31
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli32_0x7fffffff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 31
+; RV64I-NEXT:    subw a0, a1, a0
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, 2147483647
+  ret i32 %a1
+}
+
+define i64 @muli64_0x7fffffff(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0x7fffffff:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 31
+; RV32I-NEXT:    srli a3, a0, 1
+; RV32I-NEXT:    slli a4, a1, 31
+; RV32I-NEXT:    sltu a5, a2, a0
+; RV32I-NEXT:    or a3, a4, a3
+; RV32I-NEXT:    sub a1, a3, a1
+; RV32I-NEXT:    sub a1, a1, a5
+; RV32I-NEXT:    sub a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0x7fffffff:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 31
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    ret
+  %a1 = mul i64 %a, 2147483647
+  ret i64 %a1
+}
+
+define i32 @muli32_0xdeadbeef(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0xdeadbeef:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 912092
+; RV32I-NEXT:    addi a1, a1, -273
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV64I-LABEL: muli32_0xdeadbeef:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a1, 912092
+; RV64I-NEXT:    addiw a1, a1, -273
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, -559038737
+  ret i32 %a1
+}
+
+define i64 @muli64_0xdeadbeef(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0xdeadbeef:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a2, 912092
+; RV32I-NEXT:    addi a2, a2, -273
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0xdeadbeef:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 228023
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    addi a1, a1, -273
+; RV64I-NEXT:    tail __muldi3
+  %a1 = mul i64 %a, 3735928559
+  ret i64 %a1
+}
+
+define i32 @muli32_0x12345678(i32 %a) nounwind {
+; RV32I-LABEL: muli32_0x12345678:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 74565
+; RV32I-NEXT:    addi a1, a1, 1656
+; RV32I-NEXT:    tail __mulsi3
+;
+; RV64I-LABEL: muli32_0x12345678:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a1, 74565
+; RV64I-NEXT:    addiw a1, a1, 1656
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+  %a1 = mul i32 %a, 305419896
+  ret i32 %a1
+}
+
+define i64 @muli64_0x12345678(i64 %a) nounwind {
+; RV32I-LABEL: muli64_0x12345678:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a2, 74565
+; RV32I-NEXT:    addi a2, a2, 1656
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __muldi3
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: muli64_0x12345678:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 74565
+; RV64I-NEXT:    addiw a1, a1, 1656
+; RV64I-NEXT:    tail __muldi3
+  %a1 = mul i64 %a, 305419896
+  ret i64 %a1
+}

>From d926ec35b73b3952806fb6ee6af6bf2990729c26 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Fri, 9 May 2025 11:54:01 +0800
Subject: [PATCH 21/25] [X86] Asm modifier %a: add (%rip) for 64-bit static
 relocation model

In GCC,

```
static int a;
int foo() {
  asm("# %a0" : : "i"(&a));
}
```

lowers to `# a(%rip)` regardless of the PIC mode. This PR follow suits
for ELF -fno-pic, matching ELF -fpic (asm-modifier-pic.ll) and Mach-O
(which defaults to PIC).

Close https://github.com/llvm/llvm-project/issues/139001

Pull Request: https://github.com/llvm/llvm-project/pull/139040
---
 llvm/docs/LangRef.rst                 |  3 +++
 llvm/lib/Target/X86/X86AsmPrinter.cpp |  2 +-
 llvm/test/CodeGen/X86/asm-modifier.ll | 19 +++++++++++++------
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 568843a4486e5..7296bb84b7d95 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5776,6 +5776,7 @@ and GCC likely indicates a bug in LLVM.
 
 Target-independent:
 
+- ``a``: Print a memory reference. Targets might customize the output.
 - ``c``: Print an immediate integer constant unadorned, without
   the target-specific immediate punctuation (e.g. no ``$`` prefix).
 - ``n``: Negate and print immediate integer constant unadorned, without the
@@ -5913,6 +5914,8 @@ target-independent modifiers.
 
 X86:
 
+- ``a``: Print a memory reference. This displays as ``sym(%rip)`` for x86-64.
+  i386 should only use this with the static relocation model.
 - ``c``: Print an unadorned integer or symbol name. (The latter is
   target-specific behavior for this typically target-independent modifier).
 - ``A``: Print a register name with a '``*``' before it.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 5f5bfc70e8a1a..754f3f017fd29 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -744,7 +744,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
         llvm_unreachable("unexpected operand type!");
       case MachineOperand::MO_GlobalAddress:
         PrintSymbolOperand(MO, O);
-        if (Subtarget->isPICStyleRIPRel())
+        if (Subtarget->is64Bit())
           O << "(%rip)";
         return false;
       case MachineOperand::MO_Register:
diff --git a/llvm/test/CodeGen/X86/asm-modifier.ll b/llvm/test/CodeGen/X86/asm-modifier.ll
index e49e7d6b01964..7fa1e34a288da 100644
--- a/llvm/test/CodeGen/X86/asm-modifier.ll
+++ b/llvm/test/CodeGen/X86/asm-modifier.ll
@@ -6,12 +6,19 @@
 @var = internal global i32 0, align 4
 
 define dso_local void @test_a() nounwind {
-; CHECK-LABEL: test_a:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    #TEST 42 var#
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    ret{{[l|q]}}
+; X86-LABEL: test_a:
+; X86:       # %bb.0:
+; X86-NEXT:    #APP
+; X86-NEXT:    #TEST 42 var#
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_a:
+; X64:       # %bb.0:
+; X64-NEXT:    #APP
+; X64-NEXT:    #TEST 42 var(%rip)#
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    retq
   tail call void asm sideeffect "#TEST ${0:a} ${1:a}#", "i,i"(i32 42, ptr @var)
   ret void
 }

>From 823b1a582258f1417c648b3117ba08edc4855c68 Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ishida at apple.com>
Date: Thu, 8 May 2025 21:15:10 -0700
Subject: [PATCH 22/25] [clang-installapi] Store dylib attributes in the order
 they are passed on the command line. (#139087)

With the introduction of tbd-v5 holding rpaths, the order in which those
attributes are passed to `clang-installapi` must be represented in tbd
files. Previously, all dylib attributes were stored in a
non-deterministic `StringMap`. Instead, hold them in a custom collection
with an underlying vector to continue supporting searching by attribute.
This makes the order of all diagnostics related to load command
comparisons stable.

This approach resolves errors when building with reverse-iteration.
---
 .../include/clang/InstallAPI/DylibVerifier.h  | 24 ++++++++-
 .../InstallAPI/DiagnosticBuilderWrappers.cpp  | 10 ++--
 .../InstallAPI/DiagnosticBuilderWrappers.h    |  3 +-
 clang/lib/InstallAPI/DylibVerifier.cpp        | 49 +++++++++++++------
 .../clang-installapi/ClangInstallAPI.cpp      |  6 +--
 clang/tools/clang-installapi/Options.cpp      | 41 +++++++---------
 6 files changed, 87 insertions(+), 46 deletions(-)

diff --git a/clang/include/clang/InstallAPI/DylibVerifier.h b/clang/include/clang/InstallAPI/DylibVerifier.h
index 333f0cff077fd..4cf70a8adc9bc 100644
--- a/clang/include/clang/InstallAPI/DylibVerifier.h
+++ b/clang/include/clang/InstallAPI/DylibVerifier.h
@@ -25,9 +25,31 @@ enum class VerificationMode {
   Pedantic,
 };
 
-using LibAttrs = llvm::StringMap<ArchitectureSet>;
 using ReexportedInterfaces = llvm::SmallVector<llvm::MachO::InterfaceFile, 8>;
 
+/// Represents dynamic library specific attributes that are tied to
+/// architecture slices. It is commonly used for comparing options
+/// passed on the command line to installapi and what exists in dylib load
+/// commands.
+class LibAttrs {
+public:
+  using Entry = std::pair<std::string, ArchitectureSet>;
+  using AttrsToArchs = llvm::SmallVector<Entry, 10>;
+
+  // Mutable access to architecture set tied to the input attribute.
+  ArchitectureSet &getArchSet(StringRef Attr);
+  // Get entry based on the attribute.
+  std::optional<Entry> find(StringRef Attr) const;
+  // Immutable access to underlying container.
+  const AttrsToArchs &get() const { return LibraryAttributes; };
+  // Mutable access to underlying container.
+  AttrsToArchs &get() { return LibraryAttributes; };
+  bool operator==(const LibAttrs &Other) const { return Other.get() == get(); };
+
+private:
+  AttrsToArchs LibraryAttributes;
+};
+
 // Pointers to information about a zippered declaration used for
 // querying and reporting violations against different
 // declarations that all map to the same symbol.
diff --git a/clang/lib/InstallAPI/DiagnosticBuilderWrappers.cpp b/clang/lib/InstallAPI/DiagnosticBuilderWrappers.cpp
index c8d07f229902b..fd9db8113a41e 100644
--- a/clang/lib/InstallAPI/DiagnosticBuilderWrappers.cpp
+++ b/clang/lib/InstallAPI/DiagnosticBuilderWrappers.cpp
@@ -97,12 +97,12 @@ const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
 
 const clang::DiagnosticBuilder &
 operator<<(const clang::DiagnosticBuilder &DB,
-           const StringMapEntry<ArchitectureSet> &LibAttr) {
-  std::string IFAsString;
-  raw_string_ostream OS(IFAsString);
+           const clang::installapi::LibAttrs::Entry &LibAttr) {
+  std::string Entry;
+  raw_string_ostream OS(Entry);
 
-  OS << LibAttr.getKey() << " [ " << LibAttr.getValue() << " ]";
-  DB.AddString(IFAsString);
+  OS << LibAttr.first << " [ " << LibAttr.second << " ]";
+  DB.AddString(Entry);
   return DB;
 }
 
diff --git a/clang/lib/InstallAPI/DiagnosticBuilderWrappers.h b/clang/lib/InstallAPI/DiagnosticBuilderWrappers.h
index 48cfefbf65e6b..ba24ee415dfcf 100644
--- a/clang/lib/InstallAPI/DiagnosticBuilderWrappers.h
+++ b/clang/lib/InstallAPI/DiagnosticBuilderWrappers.h
@@ -14,6 +14,7 @@
 #define LLVM_CLANG_INSTALLAPI_DIAGNOSTICBUILDER_WRAPPER_H
 
 #include "clang/Basic/Diagnostic.h"
+#include "clang/InstallAPI/DylibVerifier.h"
 #include "llvm/TextAPI/Architecture.h"
 #include "llvm/TextAPI/ArchitectureSet.h"
 #include "llvm/TextAPI/InterfaceFile.h"
@@ -42,7 +43,7 @@ const clang::DiagnosticBuilder &operator<<(const clang::DiagnosticBuilder &DB,
 
 const clang::DiagnosticBuilder &
 operator<<(const clang::DiagnosticBuilder &DB,
-           const StringMapEntry<ArchitectureSet> &LibAttr);
+           const clang::installapi::LibAttrs::Entry &LibAttr);
 
 } // namespace MachO
 } // namespace llvm
diff --git a/clang/lib/InstallAPI/DylibVerifier.cpp b/clang/lib/InstallAPI/DylibVerifier.cpp
index d5d760767b41f..45c84c00d9236 100644
--- a/clang/lib/InstallAPI/DylibVerifier.cpp
+++ b/clang/lib/InstallAPI/DylibVerifier.cpp
@@ -18,6 +18,25 @@ using namespace llvm::MachO;
 namespace clang {
 namespace installapi {
 
+ArchitectureSet &LibAttrs::getArchSet(StringRef Attr) {
+  auto *It = llvm::find_if(LibraryAttributes, [&Attr](const auto &Input) {
+    return Attr == Input.first;
+  });
+  if (It != LibraryAttributes.end())
+    return It->second;
+  LibraryAttributes.push_back({Attr.str(), ArchitectureSet()});
+  return LibraryAttributes.back().second;
+}
+
+std::optional<LibAttrs::Entry> LibAttrs::find(StringRef Attr) const {
+  auto *It = llvm::find_if(LibraryAttributes, [&Attr](const auto &Input) {
+    return Attr == Input.first;
+  });
+  if (It == LibraryAttributes.end())
+    return std::nullopt;
+  return *It;
+}
+
 /// Metadata stored about a mapping of a declaration to a symbol.
 struct DylibVerifier::SymbolContext {
   // Name to use for all querying and verification
@@ -825,13 +844,13 @@ bool DylibVerifier::verifyBinaryAttrs(const ArrayRef<Target> ProvidedTargets,
     DylibTargets.push_back(RS->getTarget());
     const BinaryAttrs &BinInfo = RS->getBinaryAttrs();
     for (const StringRef LibName : BinInfo.RexportedLibraries)
-      DylibReexports[LibName].set(DylibTargets.back().Arch);
+      DylibReexports.getArchSet(LibName).set(DylibTargets.back().Arch);
     for (const StringRef LibName : BinInfo.AllowableClients)
-      DylibClients[LibName].set(DylibTargets.back().Arch);
+      DylibClients.getArchSet(LibName).set(DylibTargets.back().Arch);
     // Compare attributes that are only representable in >= TBD_V5.
     if (FT >= FileType::TBD_V5)
       for (const StringRef Name : BinInfo.RPaths)
-        DylibRPaths[Name].set(DylibTargets.back().Arch);
+        DylibRPaths.getArchSet(Name).set(DylibTargets.back().Arch);
   }
 
   // Check targets first.
@@ -923,31 +942,33 @@ bool DylibVerifier::verifyBinaryAttrs(const ArrayRef<Target> ProvidedTargets,
     if (Provided == Dylib)
       return true;
 
-    for (const llvm::StringMapEntry<ArchitectureSet> &PAttr : Provided) {
-      const auto DAttrIt = Dylib.find(PAttr.getKey());
-      if (DAttrIt == Dylib.end()) {
-        Ctx.Diag->Report(DiagID_missing) << "binary file" << PAttr;
+    for (const LibAttrs::Entry &PEntry : Provided.get()) {
+      const auto &[PAttr, PArchSet] = PEntry;
+      auto DAttrEntry = Dylib.find(PAttr);
+      if (!DAttrEntry) {
+        Ctx.Diag->Report(DiagID_missing) << "binary file" << PEntry;
         if (Fatal)
           return false;
       }
 
-      if (PAttr.getValue() != DAttrIt->getValue()) {
-        Ctx.Diag->Report(DiagID_mismatch) << PAttr << *DAttrIt;
+      if (PArchSet != DAttrEntry->second) {
+        Ctx.Diag->Report(DiagID_mismatch) << PEntry << *DAttrEntry;
         if (Fatal)
           return false;
       }
     }
 
-    for (const llvm::StringMapEntry<ArchitectureSet> &DAttr : Dylib) {
-      const auto PAttrIt = Provided.find(DAttr.getKey());
-      if (PAttrIt == Provided.end()) {
-        Ctx.Diag->Report(DiagID_missing) << "installAPI option" << DAttr;
+    for (const LibAttrs::Entry &DEntry : Dylib.get()) {
+      const auto &[DAttr, DArchSet] = DEntry;
+      const auto &PAttrEntry = Provided.find(DAttr);
+      if (!PAttrEntry) {
+        Ctx.Diag->Report(DiagID_missing) << "installAPI option" << DEntry;
         if (!Fatal)
           continue;
         return false;
       }
 
-      if (PAttrIt->getValue() != DAttr.getValue()) {
+      if (PAttrEntry->second != DArchSet) {
         if (Fatal)
           llvm_unreachable("this case was already covered above.");
       }
diff --git a/clang/tools/clang-installapi/ClangInstallAPI.cpp b/clang/tools/clang-installapi/ClangInstallAPI.cpp
index 37b69ccf4e00e..14e7b53d74b09 100644
--- a/clang/tools/clang-installapi/ClangInstallAPI.cpp
+++ b/clang/tools/clang-installapi/ClangInstallAPI.cpp
@@ -170,9 +170,9 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
       [&IF](
           const auto &Attrs,
           std::function<void(InterfaceFile *, StringRef, const Target &)> Add) {
-        for (const auto &Lib : Attrs)
-          for (const auto &T : IF.targets(Lib.getValue()))
-            Add(&IF, Lib.getKey(), T);
+        for (const auto &[Attr, ArchSet] : Attrs.get())
+          for (const auto &T : IF.targets(ArchSet))
+            Add(&IF, Attr, T);
       };
 
   assignLibAttrs(Opts.LinkerOpts.AllowableClients,
diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp
index 9bc168c8cd4f8..73f5470b82486 100644
--- a/clang/tools/clang-installapi/Options.cpp
+++ b/clang/tools/clang-installapi/Options.cpp
@@ -610,35 +610,35 @@ Options::processAndFilterOutInstallAPIOptions(ArrayRef<const char *> Args) {
 
   for (const Arg *A : ParsedArgs.filtered(OPT_allowable_client)) {
     auto It = ArgToArchMap.find(A);
-    LinkerOpts.AllowableClients[A->getValue()] =
+    LinkerOpts.AllowableClients.getArchSet(A->getValue()) =
         It != ArgToArchMap.end() ? It->second : ArchitectureSet();
     A->claim();
   }
 
   for (const Arg *A : ParsedArgs.filtered(OPT_reexport_l)) {
     auto It = ArgToArchMap.find(A);
-    LinkerOpts.ReexportedLibraries[A->getValue()] =
+    LinkerOpts.ReexportedLibraries.getArchSet(A->getValue()) =
         It != ArgToArchMap.end() ? It->second : ArchitectureSet();
     A->claim();
   }
 
   for (const Arg *A : ParsedArgs.filtered(OPT_reexport_library)) {
     auto It = ArgToArchMap.find(A);
-    LinkerOpts.ReexportedLibraryPaths[A->getValue()] =
+    LinkerOpts.ReexportedLibraryPaths.getArchSet(A->getValue()) =
         It != ArgToArchMap.end() ? It->second : ArchitectureSet();
     A->claim();
   }
 
   for (const Arg *A : ParsedArgs.filtered(OPT_reexport_framework)) {
     auto It = ArgToArchMap.find(A);
-    LinkerOpts.ReexportedFrameworks[A->getValue()] =
+    LinkerOpts.ReexportedFrameworks.getArchSet(A->getValue()) =
         It != ArgToArchMap.end() ? It->second : ArchitectureSet();
     A->claim();
   }
 
   for (const Arg *A : ParsedArgs.filtered(OPT_rpath)) {
     auto It = ArgToArchMap.find(A);
-    LinkerOpts.RPaths[A->getValue()] =
+    LinkerOpts.RPaths.getArchSet(A->getValue()) =
         It != ArgToArchMap.end() ? It->second : ArchitectureSet();
     A->claim();
   }
@@ -733,9 +733,9 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM,
   llvm::for_each(DriverOpts.Targets,
                  [&AllArchs](const auto &T) { AllArchs.set(T.first.Arch); });
   auto assignDefaultLibAttrs = [&AllArchs](LibAttrs &Attrs) {
-    for (StringMapEntry<ArchitectureSet> &Entry : Attrs)
-      if (Entry.getValue().empty())
-        Entry.setValue(AllArchs);
+    for (auto &[_, Archs] : Attrs.get())
+      if (Archs.empty())
+        Archs = AllArchs;
   };
   assignDefaultLibAttrs(LinkerOpts.AllowableClients);
   assignDefaultLibAttrs(LinkerOpts.ReexportedFrameworks);
@@ -789,7 +789,7 @@ std::pair<LibAttrs, ReexportedInterfaces> Options::getReexportedLibraries() {
     std::unique_ptr<InterfaceFile> Reexport = std::move(*ReexportIFOrErr);
     StringRef InstallName = Reexport->getInstallName();
     assert(!InstallName.empty() && "Parse error for install name");
-    Reexports.insert({InstallName, Archs});
+    Reexports.getArchSet(InstallName) = Archs;
     ReexportIFs.emplace_back(std::move(*Reexport));
     return true;
   };
@@ -802,39 +802,36 @@ std::pair<LibAttrs, ReexportedInterfaces> Options::getReexportedLibraries() {
   for (const PlatformType P : Platforms) {
     PathSeq PlatformSearchPaths = getPathsForPlatform(FEOpts.SystemFwkPaths, P);
     llvm::append_range(FwkSearchPaths, PlatformSearchPaths);
-    for (const StringMapEntry<ArchitectureSet> &Lib :
-         LinkerOpts.ReexportedFrameworks) {
-      std::string Name = (Lib.getKey() + ".framework/" + Lib.getKey()).str();
+    for (const auto &[Lib, Archs] : LinkerOpts.ReexportedFrameworks.get()) {
+      std::string Name = (Lib + ".framework/" + Lib);
       std::string Path = findLibrary(Name, *FM, FwkSearchPaths, {}, {});
       if (Path.empty()) {
-        Diags->Report(diag::err_cannot_find_reexport) << false << Lib.getKey();
+        Diags->Report(diag::err_cannot_find_reexport) << false << Lib;
         return {};
       }
       if (DriverOpts.TraceLibraryLocation)
         errs() << Path << "\n";
 
-      AccumulateReexports(Path, Lib.getValue());
+      AccumulateReexports(Path, Archs);
     }
     FwkSearchPaths.resize(FwkSearchPaths.size() - PlatformSearchPaths.size());
   }
 
-  for (const StringMapEntry<ArchitectureSet> &Lib :
-       LinkerOpts.ReexportedLibraries) {
-    std::string Name = "lib" + Lib.getKey().str() + ".dylib";
+  for (const auto &[Lib, Archs] : LinkerOpts.ReexportedLibraries.get()) {
+    std::string Name = "lib" + Lib + ".dylib";
     std::string Path = findLibrary(Name, *FM, {}, LinkerOpts.LibPaths, {});
     if (Path.empty()) {
-      Diags->Report(diag::err_cannot_find_reexport) << true << Lib.getKey();
+      Diags->Report(diag::err_cannot_find_reexport) << true << Lib;
       return {};
     }
     if (DriverOpts.TraceLibraryLocation)
       errs() << Path << "\n";
 
-    AccumulateReexports(Path, Lib.getValue());
+    AccumulateReexports(Path, Archs);
   }
 
-  for (const StringMapEntry<ArchitectureSet> &Lib :
-       LinkerOpts.ReexportedLibraryPaths)
-    AccumulateReexports(Lib.getKey(), Lib.getValue());
+  for (const auto &[Lib, Archs] : LinkerOpts.ReexportedLibraryPaths.get())
+    AccumulateReexports(Lib, Archs);
 
   return {std::move(Reexports), std::move(ReexportIFs)};
 }

>From 74e5a3b61e87c8c2b830286796b72dda67942b6b Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ishida at apple.com>
Date: Thu, 8 May 2025 21:18:52 -0700
Subject: [PATCH 23/25] [clang] Remove "unknown" from availability diags
 (#138610)

Previously, diagnostics like `error: 'fNew' is unavailable: introduced
in macOS 11 unknown` were getting emitted when the active target triple
didn't have an environment tied to it. Instead, add a guard against this
to avoid the `unknown`.
---
 clang/lib/AST/DeclBase.cpp                    | 26 +++++++++++--------
 .../attr-availability-erroneous-diags.c       | 10 +++++++
 2 files changed, 25 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/Driver/attr-availability-erroneous-diags.c

diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index aea19c51401aa..2052c0c7cfe42 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -695,27 +695,31 @@ static AvailabilityResult CheckAvailability(ASTContext &Context,
   if (!A->getIntroduced().empty() &&
       EnclosingVersion < A->getIntroduced()) {
     IdentifierInfo *IIEnv = A->getEnvironment();
-    StringRef TargetEnv =
-        Context.getTargetInfo().getTriple().getEnvironmentName();
-    StringRef EnvName = llvm::Triple::getEnvironmentTypeName(
-        Context.getTargetInfo().getTriple().getEnvironment());
-    // Matching environment or no environment on attribute
-    if (!IIEnv || (!TargetEnv.empty() && IIEnv->getName() == TargetEnv)) {
+    auto &Triple = Context.getTargetInfo().getTriple();
+    StringRef TargetEnv = Triple.getEnvironmentName();
+    StringRef EnvName =
+        llvm::Triple::getEnvironmentTypeName(Triple.getEnvironment());
+    // Matching environment or no environment on attribute.
+    if (!IIEnv || (Triple.hasEnvironment() && IIEnv->getName() == TargetEnv)) {
       if (Message) {
         Message->clear();
         llvm::raw_string_ostream Out(*Message);
         VersionTuple VTI(A->getIntroduced());
-        Out << "introduced in " << PrettyPlatformName << " " << VTI << " "
-            << EnvName << HintMessage;
+        Out << "introduced in " << PrettyPlatformName << " " << VTI;
+        if (Triple.hasEnvironment())
+          Out << " " << EnvName;
+        Out << HintMessage;
       }
     }
-    // Non-matching environment or no environment on target
+    // Non-matching environment or no environment on target.
     else {
       if (Message) {
         Message->clear();
         llvm::raw_string_ostream Out(*Message);
-        Out << "not available on " << PrettyPlatformName << " " << EnvName
-            << HintMessage;
+        Out << "not available on " << PrettyPlatformName;
+        if (Triple.hasEnvironment())
+          Out << " " << EnvName;
+        Out << HintMessage;
       }
     }
 
diff --git a/clang/test/Driver/attr-availability-erroneous-diags.c b/clang/test/Driver/attr-availability-erroneous-diags.c
new file mode 100644
index 0000000000000..5e67a461f3e19
--- /dev/null
+++ b/clang/test/Driver/attr-availability-erroneous-diags.c
@@ -0,0 +1,10 @@
+// RUN: not %clang -target x86_64-apple-darwin9 -fsyntax-only %s 2>&1 | FileCheck %s
+
+// CHECK: error:
+// CHECK-SAME: 'f0' is unavailable: introduced in macOS 11
+// CHECK-NOT: unknown 
+
+void f0(void) __attribute__((availability(macosx,strict,introduced=11)));
+
+void client(void) {
+f0(); }

>From a1beb619403a781153c170f041f39a3bac1cebb7 Mon Sep 17 00:00:00 2001
From: Guy David <49722543+guy-david at users.noreply.github.com>
Date: Fri, 9 May 2025 07:25:35 +0300
Subject: [PATCH 24/25] [SimplifyLibCalls] Shrink sin, cos to sinf, cosf when
 allowed (#139082)

This optimization already exists, but for the libcall versions of these
functions and not for their intrinsic form.
Solves https://github.com/llvm/llvm-project/issues/139044.

There are probably more opportunities for other intrinsics, because the
switch-case in `LibCallSimplifier::optimizeCall` covers only `pow`,
`exp2`, `log`, `log2`, `log10`, `sqrt`, `memset`, `memcpy` and
`memmove`.
---
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp |  5 ++
 llvm/test/Transforms/InstCombine/cos-1.ll     | 28 +++++---
 .../InstCombine/simplify-intrinsics.ll        | 69 +++++++++++++++++++
 3 files changed, 92 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/simplify-intrinsics.ll

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 941e787f91eff..94a79ad824370 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -4136,6 +4136,11 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
       return optimizeMemCpy(CI, Builder);
     case Intrinsic::memmove:
       return optimizeMemMove(CI, Builder);
+    case Intrinsic::sin:
+    case Intrinsic::cos:
+      if (UnsafeFPShrink)
+        return optimizeUnaryDoubleFP(CI, Builder, TLI, /*isPrecise=*/true);
+      return nullptr;
     default:
       return nullptr;
     }
diff --git a/llvm/test/Transforms/InstCombine/cos-1.ll b/llvm/test/Transforms/InstCombine/cos-1.ll
index 168d88fb3a942..7c66f27a7f5c2 100644
--- a/llvm/test/Transforms/InstCombine/cos-1.ll
+++ b/llvm/test/Transforms/InstCombine/cos-1.ll
@@ -435,11 +435,15 @@ define float @unary_negated_and_shrinkable_libcall(float %f) {
 ; TODO: It was ok to shrink the libcall, so the intrinsic should shrink too?
 
 define float @negated_and_shrinkable_intrinsic(float %f) {
-; ANY-LABEL: @negated_and_shrinkable_intrinsic(
-; ANY-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
-; ANY-NEXT:    [[COS:%.*]] = call double @llvm.cos.f64(double [[CONV1]])
-; ANY-NEXT:    [[CONV2:%.*]] = fptrunc double [[COS]] to float
-; ANY-NEXT:    ret float [[CONV2]]
+; NO-FLOAT-SHRINK-LABEL: @negated_and_shrinkable_intrinsic(
+; NO-FLOAT-SHRINK-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
+; NO-FLOAT-SHRINK-NEXT:    [[COS:%.*]] = call double @llvm.cos.f64(double [[CONV1]])
+; NO-FLOAT-SHRINK-NEXT:    [[CONV2:%.*]] = fptrunc double [[COS]] to float
+; NO-FLOAT-SHRINK-NEXT:    ret float [[CONV2]]
+;
+; DO-FLOAT-SHRINK-LABEL: @negated_and_shrinkable_intrinsic(
+; DO-FLOAT-SHRINK-NEXT:    [[COS:%.*]] = call float @llvm.cos.f32(float [[F:%.*]])
+; DO-FLOAT-SHRINK-NEXT:    ret float [[COS]]
 ;
   %conv1 = fpext float %f to double
   %neg = fsub double -0.0, %conv1
@@ -449,11 +453,15 @@ define float @negated_and_shrinkable_intrinsic(float %f) {
 }
 
 define float @unary_negated_and_shrinkable_intrinsic(float %f) {
-; ANY-LABEL: @unary_negated_and_shrinkable_intrinsic(
-; ANY-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
-; ANY-NEXT:    [[COS:%.*]] = call double @llvm.cos.f64(double [[CONV1]])
-; ANY-NEXT:    [[CONV2:%.*]] = fptrunc double [[COS]] to float
-; ANY-NEXT:    ret float [[CONV2]]
+; NO-FLOAT-SHRINK-LABEL: @unary_negated_and_shrinkable_intrinsic(
+; NO-FLOAT-SHRINK-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
+; NO-FLOAT-SHRINK-NEXT:    [[COS:%.*]] = call double @llvm.cos.f64(double [[CONV1]])
+; NO-FLOAT-SHRINK-NEXT:    [[CONV2:%.*]] = fptrunc double [[COS]] to float
+; NO-FLOAT-SHRINK-NEXT:    ret float [[CONV2]]
+;
+; DO-FLOAT-SHRINK-LABEL: @unary_negated_and_shrinkable_intrinsic(
+; DO-FLOAT-SHRINK-NEXT:    [[COS:%.*]] = call float @llvm.cos.f32(float [[F:%.*]])
+; DO-FLOAT-SHRINK-NEXT:    ret float [[COS]]
 ;
   %conv1 = fpext float %f to double
   %neg = fneg double %conv1
diff --git a/llvm/test/Transforms/InstCombine/simplify-intrinsics.ll b/llvm/test/Transforms/InstCombine/simplify-intrinsics.ll
new file mode 100644
index 0000000000000..8536512b8035f
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/simplify-intrinsics.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S                             | FileCheck %s --check-prefixes=ANY,NO-FLOAT-SHRINK
+; RUN: opt < %s -passes=instcombine -enable-double-float-shrink -S | FileCheck %s --check-prefixes=ANY,DO-FLOAT-SHRINK
+
+declare double @llvm.cos.f64(double)
+declare float @llvm.cos.f32(float)
+
+declare double @llvm.sin.f64(double)
+declare float @llvm.sin.f32(float)
+
+; cos -> cosf
+
+define float @cos_no_fastmath(float %f) {
+; NO-FLOAT-SHRINK-LABEL: @cos_no_fastmath(
+; NO-FLOAT-SHRINK-NEXT:    [[D:%.*]] = fpext float [[F:%.*]] to double
+; NO-FLOAT-SHRINK-NEXT:    [[RESULT:%.*]] = call double @llvm.cos.f64(double [[D]])
+; NO-FLOAT-SHRINK-NEXT:    [[TRUNCATED_RESULT:%.*]] = fptrunc double [[RESULT]] to float
+; NO-FLOAT-SHRINK-NEXT:    ret float [[TRUNCATED_RESULT]]
+;
+; DO-FLOAT-SHRINK-LABEL: @cos_no_fastmath(
+; DO-FLOAT-SHRINK-NEXT:    [[TMP1:%.*]] = call float @llvm.cos.f32(float [[F:%.*]])
+; DO-FLOAT-SHRINK-NEXT:    ret float [[TMP1]]
+;
+  %d = fpext float %f to double
+  %result = call double @llvm.cos.f64(double %d)
+  %truncated_result = fptrunc double %result to float
+  ret float %truncated_result
+}
+
+define float @cos_fastmath(float %f) {
+; ANY-LABEL: @cos_fastmath(
+; ANY-NEXT:    [[TMP1:%.*]] = call fast float @llvm.cos.f32(float [[F:%.*]])
+; ANY-NEXT:    ret float [[TMP1]]
+;
+  %d = fpext float %f to double
+  %result = call fast double @llvm.cos.f64(double %d)
+  %truncated_result = fptrunc double %result to float
+  ret float %truncated_result
+}
+
+; sin -> sinf
+
+define float @sin_no_fastmath(float %f) {
+; NO-FLOAT-SHRINK-LABEL: @sin_no_fastmath(
+; NO-FLOAT-SHRINK-NEXT:    [[D:%.*]] = fpext float [[F:%.*]] to double
+; NO-FLOAT-SHRINK-NEXT:    [[RESULT:%.*]] = call double @llvm.sin.f64(double [[D]])
+; NO-FLOAT-SHRINK-NEXT:    [[TRUNCATED_RESULT:%.*]] = fptrunc double [[RESULT]] to float
+; NO-FLOAT-SHRINK-NEXT:    ret float [[TRUNCATED_RESULT]]
+;
+; DO-FLOAT-SHRINK-LABEL: @sin_no_fastmath(
+; DO-FLOAT-SHRINK-NEXT:    [[TMP1:%.*]] = call float @llvm.sin.f32(float [[F:%.*]])
+; DO-FLOAT-SHRINK-NEXT:    ret float [[TMP1]]
+;
+  %d = fpext float %f to double
+  %result = call double @llvm.sin.f64(double %d)
+  %truncated_result = fptrunc double %result to float
+  ret float %truncated_result
+}
+
+define float @sin_fastmath(float %f) {
+; ANY-LABEL: @sin_fastmath(
+; ANY-NEXT:    [[TMP1:%.*]] = call fast float @llvm.sin.f32(float [[F:%.*]])
+; ANY-NEXT:    ret float [[TMP1]]
+;
+  %d = fpext float %f to double
+  %result = call fast double @llvm.sin.f64(double %d)
+  %truncated_result = fptrunc double %result to float
+  ret float %truncated_result
+}

>From b5f6c0bd046ba3f7c4e8047518c2685b4857db63 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 9 May 2025 00:32:43 -0400
Subject: [PATCH 25/25] [AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and
 `v_cvt_f32_f16`

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  16 +-
 llvm/lib/Target/AMDGPU/VOP1Instructions.td    |  14 +-
 .../inst-select-amdgcn.fcmp.constants.w32.mir |   8 +-
 .../inst-select-amdgcn.fcmp.constants.w64.mir |   8 +-
 .../AMDGPU/GlobalISel/inst-select-fptosi.mir  |  12 +-
 .../AMDGPU/GlobalISel/inst-select-fptoui.mir  |  12 +-
 .../AMDGPU/GlobalISel/inst-select-sitofp.mir  |   4 +-
 .../AMDGPU/GlobalISel/inst-select-uitofp.mir  |   4 +-
 .../AMDGPU/fix-sgpr-copies-f16-fake16.mir     |   2 +-
 ...schedule-regpressure-ilp-metric-spills.mir | 256 +++++++++---------
 .../gfx11_dasm_vop3_dpp16_from_vop1.txt       |   4 +-
 .../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt |   4 +-
 .../AMDGPU/gfx11_dasm_vop3_from_vop1.txt      |   4 +-
 .../AMDGPU/gfx12_dasm_vop3_from_vop1.txt      |   4 +-
 .../gfx12_dasm_vop3_from_vop1_dpp16.txt       |   4 +-
 .../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt |   4 +-
 16 files changed, 186 insertions(+), 174 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a90dace47fb2..4ec062916b29f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7658,11 +7658,17 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
       BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
           .addImm(16)
           .add(Inst.getOperand(1));
-      BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
-          .addImm(0) // src0_modifiers
-          .addReg(TmpReg)
-          .addImm(0)  // clamp
-          .addImm(0); // omod
+      const MachineInstrBuilder &MIB =
+          BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
+              .addImm(0) // src0_modifiers
+              .addReg(TmpReg)
+              .addImm(0)  // clamp
+              .addImm(0); // omod
+      // FIXME: this is a temporary workaround to support opsel for certain
+      // fake16 instructions. Need to remove this code after we have true16 for
+      // related instructions.
+      if (NewOpcode == AMDGPU::V_CVT_F32_F16_fake16_e64)
+        MIB.addImm(0); // op_sel0
     }
 
     MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 352a3f9c2d27f..5522d89855332 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -261,6 +261,11 @@ foreach vt = Reg32Types.types in {
   >;
 }
 
+let HasOpSel = 1 in {
+  def VOP_F16_F32_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F16_F32>;
+  def VOP_F32_F16_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F32_F16>;
+} // End HasOpSel = 1
+
 let isReMaterializable = 1 in {
 let SchedRW = [WriteDoubleCvt] in {
 // OMod clears exceptions when set in this instruction
@@ -301,15 +306,16 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
     defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
   let OtherPredicates = [UseRealTrue16Insts] in
     defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
-  let OtherPredicates = [UseFakeTrue16Insts] in
-    defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
+  let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F16_F32_fake16
+      : VOP1Inst<"v_cvt_f16_f32_fake16", VOP_F16_F32_Fake16_OP_SEL,
+                 any_fpround>;
 } // End FPDPRounding = 1, isReMaterializable = 0
 let OtherPredicates = [NotHasTrue16BitInsts] in
   defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
 let OtherPredicates = [UseRealTrue16Insts] in
   defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
-let OtherPredicates = [UseFakeTrue16Insts] in
-  defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
+let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F32_F16_fake16
+    : VOP1Inst<"v_cvt_f32_f16_fake16", VOP_F32_F16_Fake16_OP_SEL, any_fpextend>;
 
 let SubtargetPredicate = HasBF16ConversionInsts in
 defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 49383135ab0c5..66c8d11bfcc9e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -26,8 +26,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -62,8 +62,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index 828eb5d3fb40a..c7715eec76d37 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -26,8 +26,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -62,8 +62,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
index 03cb907f82a16..2c7eb23dab364 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
@@ -149,7 +149,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -196,7 +196,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -251,7 +251,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -301,7 +301,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -350,7 +350,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -407,7 +407,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
index 521a0e8a2a796..489a6b360124f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -99,7 +99,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -146,7 +146,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -201,7 +201,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -251,7 +251,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -300,7 +300,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -357,7 +357,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
index 3888ce87b46fd..72e8fc52917a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
@@ -101,7 +101,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_SITOFP %0
@@ -150,7 +150,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s16) = G_SITOFP %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
index 35d622dc57d18..1e3c4c8e596b5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
@@ -115,7 +115,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_UITOFP %0
@@ -164,7 +164,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s16) = G_UITOFP %0
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
index 5d90bab1384eb..0427b741d2456 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
@@ -33,7 +33,7 @@ body:             |
     ; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_fake16_e64_]], implicit $exec
-    ; GCN-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_fake16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_fake16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
     %2:sreg_32 = COPY %1:vgpr_32
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
index aa0d1fe45e9a8..7ff3788096303 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
@@ -424,14 +424,14 @@ body:             |
     %264:vgpr_32 = V_LSHL_OR_B32_e64 %254, 8, %263, implicit $exec
     %265:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %266:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %267:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub0, 0, 0, implicit $mode, implicit $exec
-    %268:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub1, 0, 0, implicit $mode, implicit $exec
-    %269:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub2, 0, 0, implicit $mode, implicit $exec
-    %270:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub3, 0, 0, implicit $mode, implicit $exec
-    %271:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub0, 0, 0, implicit $mode, implicit $exec
-    %272:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub1, 0, 0, implicit $mode, implicit $exec
-    %273:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub2, 0, 0, implicit $mode, implicit $exec
-    %274:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub3, 0, 0, implicit $mode, implicit $exec
+    %267:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %268:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %269:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %270:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %271:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %272:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %273:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %274:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %275.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %273, 0, %274, 0, 0, implicit $mode, implicit $exec
     %275.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %271, 0, %272, 0, 0, implicit $mode, implicit $exec
     %275.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %269, 0, %270, 0, 0, implicit $mode, implicit $exec
@@ -446,14 +446,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %212.sub6, %212.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %277:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %278:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %279:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub0, 0, 0, implicit $mode, implicit $exec
-    %280:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub1, 0, 0, implicit $mode, implicit $exec
-    %281:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub2, 0, 0, implicit $mode, implicit $exec
-    %282:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub3, 0, 0, implicit $mode, implicit $exec
-    %283:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub0, 0, 0, implicit $mode, implicit $exec
-    %284:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub1, 0, 0, implicit $mode, implicit $exec
-    %285:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub2, 0, 0, implicit $mode, implicit $exec
-    %286:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub3, 0, 0, implicit $mode, implicit $exec
+    %279:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %280:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %281:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %282:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %283:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %284:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %285:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %286:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %287.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %285, 0, %286, 0, 0, implicit $mode, implicit $exec
     %287.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %283, 0, %284, 0, 0, implicit $mode, implicit $exec
     %287.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %281, 0, %282, 0, 0, implicit $mode, implicit $exec
@@ -465,14 +465,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %246.sub6, %246.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %288:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %289:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %290:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub0, 0, 0, implicit $mode, implicit $exec
-    %291:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub1, 0, 0, implicit $mode, implicit $exec
-    %292:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub2, 0, 0, implicit $mode, implicit $exec
-    %293:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub3, 0, 0, implicit $mode, implicit $exec
-    %294:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub0, 0, 0, implicit $mode, implicit $exec
-    %295:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub1, 0, 0, implicit $mode, implicit $exec
-    %296:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub2, 0, 0, implicit $mode, implicit $exec
-    %297:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub3, 0, 0, implicit $mode, implicit $exec
+    %290:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %291:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %292:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %293:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %294:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %295:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %296:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %297:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %298.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %296, 0, %297, 0, 0, implicit $mode, implicit $exec
     %298.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %294, 0, %295, 0, 0, implicit $mode, implicit $exec
     %298.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %292, 0, %293, 0, 0, implicit $mode, implicit $exec
@@ -485,14 +485,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %250.sub6, %250.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %300:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %301:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %302:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub0, 0, 0, implicit $mode, implicit $exec
-    %303:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub1, 0, 0, implicit $mode, implicit $exec
-    %304:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub2, 0, 0, implicit $mode, implicit $exec
-    %305:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub3, 0, 0, implicit $mode, implicit $exec
-    %306:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub0, 0, 0, implicit $mode, implicit $exec
-    %307:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub1, 0, 0, implicit $mode, implicit $exec
-    %308:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub2, 0, 0, implicit $mode, implicit $exec
-    %309:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub3, 0, 0, implicit $mode, implicit $exec
+    %302:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %303:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %304:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %305:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %306:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %307:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %308:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %309:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %310.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %308, 0, %309, 0, 0, implicit $mode, implicit $exec
     %310.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %306, 0, %307, 0, 0, implicit $mode, implicit $exec
     %310.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %304, 0, %305, 0, 0, implicit $mode, implicit $exec
@@ -506,14 +506,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %253.sub6, %253.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %313:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %314:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %315:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub0, 0, 0, implicit $mode, implicit $exec
-    %316:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub1, 0, 0, implicit $mode, implicit $exec
-    %317:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub2, 0, 0, implicit $mode, implicit $exec
-    %318:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub3, 0, 0, implicit $mode, implicit $exec
-    %319:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub0, 0, 0, implicit $mode, implicit $exec
-    %320:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub1, 0, 0, implicit $mode, implicit $exec
-    %321:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub2, 0, 0, implicit $mode, implicit $exec
-    %322:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub3, 0, 0, implicit $mode, implicit $exec
+    %315:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %316:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %317:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %318:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %319:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %320:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %321:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %322:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %323.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %321, 0, %322, 0, 0, implicit $mode, implicit $exec
     %323.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %319, 0, %320, 0, 0, implicit $mode, implicit $exec
     %323.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %317, 0, %318, 0, 0, implicit $mode, implicit $exec
@@ -526,14 +526,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %214.sub6, %214.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %325:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %326:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %327:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub0, 0, 0, implicit $mode, implicit $exec
-    %328:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub1, 0, 0, implicit $mode, implicit $exec
-    %329:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub2, 0, 0, implicit $mode, implicit $exec
-    %330:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub3, 0, 0, implicit $mode, implicit $exec
-    %331:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub0, 0, 0, implicit $mode, implicit $exec
-    %332:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub1, 0, 0, implicit $mode, implicit $exec
-    %333:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub2, 0, 0, implicit $mode, implicit $exec
-    %334:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub3, 0, 0, implicit $mode, implicit $exec
+    %327:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %328:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %329:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %330:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %331:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %332:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %333:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %334:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %335.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %333, 0, %334, 0, 0, implicit $mode, implicit $exec
     %335.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %331, 0, %332, 0, 0, implicit $mode, implicit $exec
     %335.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %329, 0, %330, 0, 0, implicit $mode, implicit $exec
@@ -546,14 +546,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %247.sub6, %247.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %337:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %338:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %339:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub0, 0, 0, implicit $mode, implicit $exec
-    %340:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub1, 0, 0, implicit $mode, implicit $exec
-    %341:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub2, 0, 0, implicit $mode, implicit $exec
-    %342:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub3, 0, 0, implicit $mode, implicit $exec
-    %343:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub0, 0, 0, implicit $mode, implicit $exec
-    %344:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub1, 0, 0, implicit $mode, implicit $exec
-    %345:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub2, 0, 0, implicit $mode, implicit $exec
-    %346:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub3, 0, 0, implicit $mode, implicit $exec
+    %339:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %340:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %341:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %342:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %343:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %344:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %345:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %346:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %347.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %345, 0, %346, 0, 0, implicit $mode, implicit $exec
     %347.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %343, 0, %344, 0, 0, implicit $mode, implicit $exec
     %347.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %341, 0, %342, 0, 0, implicit $mode, implicit $exec
@@ -567,14 +567,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %213.sub6, %213.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %350:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %351:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %352:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub0, 0, 0, implicit $mode, implicit $exec
-    %353:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub1, 0, 0, implicit $mode, implicit $exec
-    %354:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub2, 0, 0, implicit $mode, implicit $exec
-    %355:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub3, 0, 0, implicit $mode, implicit $exec
-    %356:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub0, 0, 0, implicit $mode, implicit $exec
-    %357:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub1, 0, 0, implicit $mode, implicit $exec
-    %358:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub2, 0, 0, implicit $mode, implicit $exec
-    %359:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub3, 0, 0, implicit $mode, implicit $exec
+    %352:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %353:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %354:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %355:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %356:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %357:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %358:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %359:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %360.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %358, 0, %359, 0, 0, implicit $mode, implicit $exec
     %360.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %356, 0, %357, 0, 0, implicit $mode, implicit $exec
     %360.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %354, 0, %355, 0, 0, implicit $mode, implicit $exec
@@ -588,14 +588,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %216.sub6, %216.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %363:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %364:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %365:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub0, 0, 0, implicit $mode, implicit $exec
-    %366:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub1, 0, 0, implicit $mode, implicit $exec
-    %367:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub2, 0, 0, implicit $mode, implicit $exec
-    %368:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub3, 0, 0, implicit $mode, implicit $exec
-    %369:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub0, 0, 0, implicit $mode, implicit $exec
-    %370:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub1, 0, 0, implicit $mode, implicit $exec
-    %371:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub2, 0, 0, implicit $mode, implicit $exec
-    %372:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub3, 0, 0, implicit $mode, implicit $exec
+    %365:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %366:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %367:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %368:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %369:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %370:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %371:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %372:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %373.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %371, 0, %372, 0, 0, implicit $mode, implicit $exec
     %373.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %369, 0, %370, 0, 0, implicit $mode, implicit $exec
     %373.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %367, 0, %368, 0, 0, implicit $mode, implicit $exec
@@ -608,14 +608,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %248.sub6, %248.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %375:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %376:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %377:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub0, 0, 0, implicit $mode, implicit $exec
-    %378:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub1, 0, 0, implicit $mode, implicit $exec
-    %379:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub2, 0, 0, implicit $mode, implicit $exec
-    %380:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub3, 0, 0, implicit $mode, implicit $exec
-    %381:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub0, 0, 0, implicit $mode, implicit $exec
-    %382:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub1, 0, 0, implicit $mode, implicit $exec
-    %383:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub2, 0, 0, implicit $mode, implicit $exec
-    %384:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub3, 0, 0, implicit $mode, implicit $exec
+    %377:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %378:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %379:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %380:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %381:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %382:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %383:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %384:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %385.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %383, 0, %384, 0, 0, implicit $mode, implicit $exec
     %385.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %381, 0, %382, 0, 0, implicit $mode, implicit $exec
     %385.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %379, 0, %380, 0, 0, implicit $mode, implicit $exec
@@ -627,14 +627,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %217.sub6, %217.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %386:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %387:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %388:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub0, 0, 0, implicit $mode, implicit $exec
-    %389:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub1, 0, 0, implicit $mode, implicit $exec
-    %390:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub2, 0, 0, implicit $mode, implicit $exec
-    %391:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub3, 0, 0, implicit $mode, implicit $exec
-    %392:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub0, 0, 0, implicit $mode, implicit $exec
-    %393:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub1, 0, 0, implicit $mode, implicit $exec
-    %394:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub2, 0, 0, implicit $mode, implicit $exec
-    %395:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub3, 0, 0, implicit $mode, implicit $exec
+    %388:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %389:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %390:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %391:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %392:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %393:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %394:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %395:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %396.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %394, 0, %395, 0, 0, implicit $mode, implicit $exec
     %396.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %392, 0, %393, 0, 0, implicit $mode, implicit $exec
     %396.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %390, 0, %391, 0, 0, implicit $mode, implicit $exec
@@ -647,14 +647,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %251.sub6, %251.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %398:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %399:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %400:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub0, 0, 0, implicit $mode, implicit $exec
-    %401:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub1, 0, 0, implicit $mode, implicit $exec
-    %402:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub2, 0, 0, implicit $mode, implicit $exec
-    %403:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub3, 0, 0, implicit $mode, implicit $exec
-    %404:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub0, 0, 0, implicit $mode, implicit $exec
-    %405:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub1, 0, 0, implicit $mode, implicit $exec
-    %406:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub2, 0, 0, implicit $mode, implicit $exec
-    %407:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub3, 0, 0, implicit $mode, implicit $exec
+    %400:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %401:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %402:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %403:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %404:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %405:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %406:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %407:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %408.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %406, 0, %407, 0, 0, implicit $mode, implicit $exec
     %408.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %404, 0, %405, 0, 0, implicit $mode, implicit $exec
     %408.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %402, 0, %403, 0, 0, implicit $mode, implicit $exec
@@ -667,14 +667,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %252.sub6, %252.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %410:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %411:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %412:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub0, 0, 0, implicit $mode, implicit $exec
-    %413:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub1, 0, 0, implicit $mode, implicit $exec
-    %414:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub2, 0, 0, implicit $mode, implicit $exec
-    %415:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub3, 0, 0, implicit $mode, implicit $exec
-    %416:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub0, 0, 0, implicit $mode, implicit $exec
-    %417:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub1, 0, 0, implicit $mode, implicit $exec
-    %418:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub2, 0, 0, implicit $mode, implicit $exec
-    %419:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub3, 0, 0, implicit $mode, implicit $exec
+    %412:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %413:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %414:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %415:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %416:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %417:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %418:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %419:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %420.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %418, 0, %419, 0, 0, implicit $mode, implicit $exec
     %420.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %416, 0, %417, 0, 0, implicit $mode, implicit $exec
     %420.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %414, 0, %415, 0, 0, implicit $mode, implicit $exec
@@ -687,14 +687,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %220.sub6, %220.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %422:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %423:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %424:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub0, 0, 0, implicit $mode, implicit $exec
-    %425:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub1, 0, 0, implicit $mode, implicit $exec
-    %426:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub2, 0, 0, implicit $mode, implicit $exec
-    %427:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub3, 0, 0, implicit $mode, implicit $exec
-    %428:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub0, 0, 0, implicit $mode, implicit $exec
-    %429:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub1, 0, 0, implicit $mode, implicit $exec
-    %430:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub2, 0, 0, implicit $mode, implicit $exec
-    %431:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub3, 0, 0, implicit $mode, implicit $exec
+    %424:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %425:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %426:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %427:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %428:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %429:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %430:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %431:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %432.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %430, 0, %431, 0, 0, implicit $mode, implicit $exec
     %432.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %428, 0, %429, 0, 0, implicit $mode, implicit $exec
     %432.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %426, 0, %427, 0, 0, implicit $mode, implicit $exec
@@ -707,14 +707,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %249.sub6, %249.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %434:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %435:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %436:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub0, 0, 0, implicit $mode, implicit $exec
-    %437:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub1, 0, 0, implicit $mode, implicit $exec
-    %438:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub2, 0, 0, implicit $mode, implicit $exec
-    %439:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub3, 0, 0, implicit $mode, implicit $exec
-    %440:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub0, 0, 0, implicit $mode, implicit $exec
-    %441:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub1, 0, 0, implicit $mode, implicit $exec
-    %442:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub2, 0, 0, implicit $mode, implicit $exec
-    %443:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub3, 0, 0, implicit $mode, implicit $exec
+    %436:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %437:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %438:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %439:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %440:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %441:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %442:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %443:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %444.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %442, 0, %443, 0, 0, implicit $mode, implicit $exec
     %444.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %440, 0, %441, 0, 0, implicit $mode, implicit $exec
     %444.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %438, 0, %439, 0, 0, implicit $mode, implicit $exec
@@ -727,14 +727,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %219.sub6, %219.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %446:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %447:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %448:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub0, 0, 0, implicit $mode, implicit $exec
-    %449:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub1, 0, 0, implicit $mode, implicit $exec
-    %450:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub2, 0, 0, implicit $mode, implicit $exec
-    %451:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub3, 0, 0, implicit $mode, implicit $exec
-    %452:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub0, 0, 0, implicit $mode, implicit $exec
-    %453:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub1, 0, 0, implicit $mode, implicit $exec
-    %454:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub2, 0, 0, implicit $mode, implicit $exec
-    %455:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub3, 0, 0, implicit $mode, implicit $exec
+    %448:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %449:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %450:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %451:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %452:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %453:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %454:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %455:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %456.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %454, 0, %455, 0, 0, implicit $mode, implicit $exec
     %456.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %452, 0, %453, 0, 0, implicit $mode, implicit $exec
     %456.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %450, 0, %451, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index 282ff229c57e6..4f1b40f0989ff 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -448,7 +448,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
 # GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
 
 0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 # GFX11-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -644,7 +644,7 @@
 
 0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
 # GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
 
 0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 # GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index 5995762ce6ff1..228cb94582473 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -124,7 +124,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 
 0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 # GFX11-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -200,7 +200,7 @@
 
 0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 
 0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 # GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
index d7e73909286a2..a1b2ac8477783 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
@@ -504,7 +504,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
 # GFX11-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
-# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
 
 0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00
 # GFX11-REAL16: v_cvt_f16_i16_e64 v5.l, v1.l            ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00]
@@ -699,7 +699,7 @@
 
 0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00
 # GFX11-REAL16: v_cvt_f32_f16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255              ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
 
 0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00
 # GFX11: v_cvt_f32_f64_e64 v5, v[1:2]            ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
index 3ccf6feac4cca..8cc1d769b5307 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
@@ -556,7 +556,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
 # GFX12-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
-# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
 
 0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00
 # GFX12-REAL16: v_cvt_f16_i16_e64 v5.l, v1.l            ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00]
@@ -751,7 +751,7 @@
 
 0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00
 # GFX12-REAL16: v_cvt_f32_f16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f32_f16_e64 v5, v255              ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f32_f16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
 
 0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00
 # GFX12: v_cvt_f32_f64_e64 v5, v[1:2]            ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
index a020b0ae46a37..8982c399c6af7 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
@@ -478,7 +478,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
 # GFX12-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
 
 0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 # GFX12-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -674,7 +674,7 @@
 
 0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
 # GFX12-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
 
 0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 # GFX12: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
index ad3c673b4e390..81565cae6ea04 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
@@ -154,7 +154,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 
 0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -230,7 +230,7 @@
 
 0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 
 0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 # GFX12: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]