[libc-commits] [flang] [libc] [lldb] [llvm] [libc][math] Refactor ffma implementation to header-only in src/__support/math folder (PR #175304)

Thu Feb 5 09:11:05 PST 2026

https://github.com/ProfessionalMenace updated https://github.com/llvm/llvm-project/pull/175304

>From 7074da0848f8f69ab352f68a69b86a3a820312aa Mon Sep 17 00:00:00 2001
From: Menace <vondracekadam00 at gmail.com>
Date: Sat, 10 Jan 2026 11:28:35 +0100
Subject: [PATCH 01/12] [libc][math] Refactor ffma implementation to
 header-only in src/__support/math folder.

---
 libc/shared/math.h                            |  1 +
 libc/shared/math/ffma.h                       | 23 ++++++++++++++++
 libc/src/__support/math/CMakeLists.txt        |  8 ++++++
 libc/src/__support/math/ffma.h                | 26 +++++++++++++++++++
 libc/src/math/generic/ffma.cpp                |  6 ++---
 libc/test/shared/CMakeLists.txt               |  1 +
 libc/test/shared/shared_math_test.cpp         |  1 +
 .../llvm-project-overlay/libc/BUILD.bazel     | 10 ++++++-
 8 files changed, 71 insertions(+), 5 deletions(-)
 create mode 100644 libc/shared/math/ffma.h
 create mode 100644 libc/src/__support/math/ffma.h

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 70c6d375c22de..7d2adfe62a98f 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -56,6 +56,7 @@
 #include "math/expf16.h"
 #include "math/expm1.h"
 #include "math/expm1f.h"
+#include "math/ffma.h"
 #include "math/frexpf.h"
 #include "math/frexpf128.h"
 #include "math/frexpf16.h"
diff --git a/libc/shared/math/ffma.h b/libc/shared/math/ffma.h
new file mode 100644
index 0000000000000..1ff3e6fde76cf
--- /dev/null
+++ b/libc/shared/math/ffma.h
@@ -0,0 +1,23 @@
+//===-- Shared ffma function ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_FFMA_H
+#define LLVM_LIBC_SHARED_MATH_FFMA_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/ffma.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::ffma;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_FFMA_H
\ No newline at end of file
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index f8622da52d983..e8e9a8eb532b3 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -594,6 +594,14 @@ add_header_library(
     libc.src.__support.math.exp10_float16_constants
 )
 
+add_header_library(
+  ffma.h
+  HDRS
+    ffma.h
+  DEPENDS
+    libc.src.__support.FPUtil.fma
+)
+
 add_header_library(
   frexpf128
   HDRS
diff --git a/libc/src/__support/math/ffma.h b/libc/src/__support/math/ffma.h
new file mode 100644
index 0000000000000..9fb328c326361
--- /dev/null
+++ b/libc/src/__support/math/ffma.h
@@ -0,0 +1,26 @@
+//===-- Implementation header for ffma --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_FFMA_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_FFMA_H
+
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static float ffma(double x, double y, double z) {
+  return fputil::fma<float>(x, y, z);
+}
+
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_FFMA_H
diff --git a/libc/src/math/generic/ffma.cpp b/libc/src/math/generic/ffma.cpp
index a4c834ddd7986..20d374ba3463c 100644
--- a/libc/src/math/generic/ffma.cpp
+++ b/libc/src/math/generic/ffma.cpp
@@ -7,14 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/ffma.h"
-#include "src/__support/FPUtil/FMA.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/math/ffma.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float, ffma, (double x, double y, double z)) {
-  return fputil::fma<float>(x, y, z);
+  return math::ffma(x, y, z);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index 4482d6344ae03..e3ec8c8a50815 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -52,6 +52,7 @@ add_fp_unittest(
     libc.src.__support.math.exp10f16
     libc.src.__support.math.expf
     libc.src.__support.math.expf16
+    libc.src.__support.math.ffma
     libc.src.__support.math.frexpf
     libc.src.__support.math.frexpf128
     libc.src.__support.math.frexpf16
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index f09b0dcd4ef9f..c8c998a7bc615 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -89,6 +89,7 @@ TEST(LlvmLibcSharedMathTest, AllDouble) {
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp2(0.0));
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp10(0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::expm1(0.0));
+  EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::ffma(0.0));
 }
 
 #ifdef LIBC_TYPES_HAS_FLOAT128
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 913e913572b14..e990c3272e3f2 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2804,6 +2804,14 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_ffma",
+    hdrs = ["src/__support/math/ffma.h"],
+    deps = [
+        ":__support_fputil_fma",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_frexpf128",
     hdrs = ["src/__support/math/frexpf128.h"],
@@ -3971,7 +3979,7 @@ libc_math_function(name = "fdivf128")
 libc_math_function(
     name = "ffma",
     additional_deps = [
-        ":__support_fputil_fma",
+        ":__support_math_ffma",
     ],
 )
 

>From a223e700441eedf09db810cf8a96d14b438a981e Mon Sep 17 00:00:00 2001
From: Menace <vondracekadam00 at gmail.com>
Date: Sat, 10 Jan 2026 12:35:32 +0100
Subject: [PATCH 02/12] fix

---
 libc/shared/math/ffma.h              | 3 ++-
 libc/src/math/generic/CMakeLists.txt | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libc/shared/math/ffma.h b/libc/shared/math/ffma.h
index 1ff3e6fde76cf..cbd24ac0a186f 100644
--- a/libc/shared/math/ffma.h
+++ b/libc/shared/math/ffma.h
@@ -20,4 +20,5 @@ using math::ffma;
 } // namespace shared
 } // namespace LIBC_NAMESPACE_DECL
 
-#endif // LLVM_LIBC_SHARED_MATH_FFMA_H
\ No newline at end of file
+#endif // LLVM_LIBC_SHARED_MATH_FFMA_H
+
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index e343c1f15c3f1..930e6b6dc85f0 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3437,7 +3437,7 @@ add_entrypoint_object(
   HDRS
     ../ffma.h
   DEPENDS
-    libc.src.__support.FPUtil.fma
+    libc.src.__support.math.fma
 )
 
 add_entrypoint_object(

>From 5d9b6c9080c809816d62a64d06fb90568de82eea Mon Sep 17 00:00:00 2001
From: Menace <155697298+ProfessionalMenace at users.noreply.github.com>
Date: Sat, 10 Jan 2026 17:24:43 +0100
Subject: [PATCH 03/12] Apply suggestions from code review

Co-authored-by: Muhammad Bassiouni <60100307+bassiounix at users.noreply.github.com>
---
 libc/src/__support/math/CMakeLists.txt | 2 +-
 libc/src/__support/math/ffma.h         | 2 +-
 libc/src/math/generic/CMakeLists.txt   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 3b429dd6c692e..8c1425d9c2081 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -595,7 +595,7 @@ add_header_library(
 )
 
 add_header_library(
-  ffma.h
+  ffma
   HDRS
     ffma.h
   DEPENDS
diff --git a/libc/src/__support/math/ffma.h b/libc/src/__support/math/ffma.h
index 9fb328c326361..9d4bcfe453362 100644
--- a/libc/src/__support/math/ffma.h
+++ b/libc/src/__support/math/ffma.h
@@ -16,7 +16,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-LIBC_INLINE static float ffma(double x, double y, double z) {
+LIBC_INLINE static constexpr float ffma(double x, double y, double z) {
   return fputil::fma<float>(x, y, z);
 }
 
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 1fdfb49631755..fd61a978f16e7 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3417,7 +3417,7 @@ add_entrypoint_object(
   HDRS
     ../ffma.h
   DEPENDS
-    libc.src.__support.math.fma
+    libc.src.__support.math.ffma
 )
 
 add_entrypoint_object(

>From bad7aaa6e08b2b2d5da7c7f711a72ba5a16c290c Mon Sep 17 00:00:00 2001
From: Menace <155697298+ProfessionalMenace at users.noreply.github.com>
Date: Sat, 10 Jan 2026 17:46:09 +0100
Subject: [PATCH 04/12] fix testing

---
 libc/test/shared/shared_math_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 75daa1859b2fc..38e36f48f6074 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -90,7 +90,7 @@ TEST(LlvmLibcSharedMathTest, AllDouble) {
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp2(0.0));
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp10(0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::expm1(0.0));
-  EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::ffma(0.0));
+  EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::ffma(0.0, 0.0, 0.0));
   EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::shared::sin(0.0));
 }
 

>From 67c507ef19ac426ebc652b67b9a8e8107a4e1597 Mon Sep 17 00:00:00 2001
From: Menace <vondracekadam00 at gmail.com>
Date: Sat, 10 Jan 2026 18:22:58 +0100
Subject: [PATCH 05/12] fix formatting

---
 libc/shared/math/ffma.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libc/shared/math/ffma.h b/libc/shared/math/ffma.h
index cbd24ac0a186f..b8b0b97867ef9 100644
--- a/libc/shared/math/ffma.h
+++ b/libc/shared/math/ffma.h
@@ -21,4 +21,3 @@ using math::ffma;
 } // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SHARED_MATH_FFMA_H
-

>From c10f65ef8e15a697e77995c86363e2d6455e7a44 Mon Sep 17 00:00:00 2001
From: Menace <155697298+ProfessionalMenace at users.noreply.github.com>
Date: Mon, 2 Feb 2026 10:42:16 +0100
Subject: [PATCH 06/12] removed constexpr

---
 libc/src/__support/math/ffma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/math/ffma.h b/libc/src/__support/math/ffma.h
index 9d4bcfe453362..9fb328c326361 100644
--- a/libc/src/__support/math/ffma.h
+++ b/libc/src/__support/math/ffma.h
@@ -16,7 +16,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-LIBC_INLINE static constexpr float ffma(double x, double y, double z) {
+LIBC_INLINE static float ffma(double x, double y, double z) {
   return fputil::fma<float>(x, y, z);
 }
 

>From 7a81f69588f7c602e9a7d88979572a09a6565e70 Mon Sep 17 00:00:00 2001
From: ProfessionalMenace <vondracekadam00 at gmail.com>
Date: Mon, 2 Feb 2026 11:54:28 +0100
Subject: [PATCH 07/12] fixed up merging mishap

---
 libc/src/__support/math/CMakeLists.txt          | 17 ++++++++++-------
 .../bazel/llvm-project-overlay/libc/BUILD.bazel | 16 ++++++++++------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 84756421015b0..d50a15adc25af 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -593,6 +593,16 @@ add_header_library(
     libc.src.__support.math.exp10_float16_constants
 )
 
+add_header_library(
+  f16sqrt
+  HDRS
+    f16sqrt.h
+  DEPENDS
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.common
+    libc.src.__support.macros.config
+)
+
 add_header_library(
   f16sqrtl
   HDRS
@@ -608,13 +618,6 @@ add_header_library(
     ffma.h
   DEPENDS
     libc.src.__support.FPUtil.fma
-  f16sqrt
-  HDRS
-    f16sqrt.h
-  DEPENDS
-    libc.src.__support.FPUtil.sqrt
-    libc.src.__support.common
-    libc.src.__support.macros.config
 )
 
 add_header_library(
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 3560f9b471173..f310c6d0b8412 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2898,6 +2898,16 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_f16fma",
+    hdrs = ["src/__support/math/f16fma.h"],
+    deps = [
+        ":__support_fputil_fma",
+        ":__support_macros_config",
+        ":llvm_libc_macros_float16_macros",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_f16fmal",
     hdrs = ["src/__support/math/f16fmal.h"],
@@ -2914,12 +2924,6 @@ libc_support_library(
     hdrs = ["src/__support/math/ffma.h"],
     deps = [
         ":__support_fputil_fma",
-    name = "__support_math_f16fma",
-    hdrs = ["src/__support/math/f16fma.h"],
-    deps = [
-        ":__support_fputil_fma",
-        ":__support_macros_config",
-        ":llvm_libc_macros_float16_macros",
     ],
 )
 

>From 7bc7bfbe5e646a20bf63f4e73127397eda5ea7e6 Mon Sep 17 00:00:00 2001
From: Alexander Kornienko <alexfh at google.com>
Date: Thu, 5 Feb 2026 15:41:49 +0100
Subject: [PATCH 08/12] Revert "[LoopVectorize] Support vectorization of
 overflow intrinsics" (#179819)

Reverts llvm/llvm-project#174835, which causes clang crashes.

See
https://github.com/llvm/llvm-project/pull/174835#issuecomment-3844233831
and https://github.com/llvm/llvm-project/issues/179671 for details.
---
 llvm/lib/Analysis/VectorUtils.cpp             |  15 +-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |   7 +-
 .../AArch64/multiple-result-intrinsics.ll     |  74 +--
 .../multiple-result-intrinsics.ll             | 470 +-----------------
 .../Transforms/LoopVectorize/struct-return.ll |  33 +-
 5 files changed, 27 insertions(+), 572 deletions(-)

diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index d4083c49626fe..79723c9815445 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -65,12 +65,6 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::smul_fix_sat:
   case Intrinsic::umul_fix:
   case Intrinsic::umul_fix_sat:
-  case Intrinsic::uadd_with_overflow:
-  case Intrinsic::sadd_with_overflow:
-  case Intrinsic::usub_with_overflow:
-  case Intrinsic::ssub_with_overflow:
-  case Intrinsic::umul_with_overflow:
-  case Intrinsic::smul_with_overflow:
   case Intrinsic::sqrt: // Begin floating-point.
   case Intrinsic::asin:
   case Intrinsic::acos:
@@ -136,6 +130,15 @@ bool llvm::isTriviallyScalarizable(Intrinsic::ID ID,
   if (TTI && Intrinsic::isTargetIntrinsic(ID))
     return TTI->isTargetIntrinsicTriviallyScalarizable(ID);
 
+  switch (ID) {
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::smul_with_overflow:
+    return true;
+  }
   return false;
 }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index fa395e7d07531..898507d11a46b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1834,12 +1834,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
   if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1,
                                              State.TTI)) {
     Type *RetTy = toVectorizedTy(getResultType(), State.VF);
-    ArrayRef<Type *> ContainedTys = getContainedTypes(RetTy);
-    for (auto [Idx, Ty] : enumerate(ContainedTys)) {
-      if (isVectorIntrinsicWithStructReturnOverloadAtField(VectorIntrinsicID,
-                                                           Idx, State.TTI))
-        TysForDecl.push_back(Ty);
-    }
+    append_range(TysForDecl, getContainedTypes(RetTy));
   }
   SmallVector<Value *, 4> Args;
   for (const auto &I : enumerate(operands())) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll
index 55994ad9a98f8..66247a4f8100e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|modf|extractvalue|store|with\.overflow)" --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|modf|extractvalue|store)" --version 5
 ; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve < %s -S -o - -debug-only=loop-vectorize 2>%t.1 | FileCheck %s --check-prefix=CHECK
 ; RUN: opt -passes=loop-vectorize -mtriple=aarch64-gnu-linux -mcpu=neoverse-v1 -mattr=+sve -vector-library=ArmPL < %s -S -o - -debug-only=loop-vectorize 2>%t.2 | FileCheck %s --check-prefix=CHECK-ARMPL
 ; RUN: FileCheck --input-file=%t.1 --check-prefix=CHECK-COST %s
@@ -526,75 +526,3 @@ for.body:
 exit:
   ret void
 }
-
-; CHECK-COST-LABEL: sadd_with_overflow_i32
-; CHECK-COST: LV: Found an estimated cost of 1 for VF 1 For instruction:   %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %val_a, i32 %val_b)
-; CHECK-COST: Cost of 4 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST: Cost of 4 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST: Cost of 7 for VF 8: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST: Cost of 13 for VF 16: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sadd.with.overflow.i32(ir<%val_a>, ir<%val_b>)
-; CHECK-COST: Cost of 4 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST: Cost of 4 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-
-; CHECK-COST-ARMPL-LABEL: sadd_with_overflow_i32
-; CHECK-COST-ARMPL: LV: Found an estimated cost of 1 for VF 1 For instruction:   %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %val_a, i32 %val_b)
-; CHECK-COST-ARMPL: Cost of 4 for VF 2: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST-ARMPL: Cost of 4 for VF 4: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST-ARMPL: Cost of 7 for VF 8: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST-ARMPL: Cost of 13 for VF 16: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST-ARMPL: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @llvm.sadd.with.overflow.i32(ir<%val_a>, ir<%val_b>)
-; CHECK-COST-ARMPL: Cost of 4 for VF vscale x 2: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-; CHECK-COST-ARMPL: Cost of 4 for VF vscale x 4: WIDEN-INTRINSIC ir<%call> = call llvm.sadd.with.overflow(ir<%val_a>, ir<%val_b>)
-
-define void @sadd_with_overflow_i32(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @sadd_with_overflow_i32(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) #[[ATTR0]] {
-; CHECK:  [[ENTRY:.*:]]
-; CHECK:  [[VECTOR_PH:.*:]]
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:    [[TMP9:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.sadd.with.overflow.nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD1:%.*]])
-; CHECK:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } [[TMP9]], 0
-; CHECK:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } [[TMP9]], 1
-; CHECK:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP10]], ptr align 4 [[TMP13:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; CHECK:    call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP12:%.*]], ptr align 1 [[TMP14:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK:  [[MIDDLE_BLOCK:.*:]]
-; CHECK:  [[EXIT:.*:]]
-;
-; CHECK-ARMPL-LABEL: define void @sadd_with_overflow_i32(
-; CHECK-ARMPL-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) #[[ATTR0]] {
-; CHECK-ARMPL:  [[ENTRY:.*:]]
-; CHECK-ARMPL:  [[VECTOR_PH:.*:]]
-; CHECK-ARMPL:  [[VECTOR_BODY:.*:]]
-; CHECK-ARMPL:    [[TMP9:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i1> } @llvm.sadd.with.overflow.nxv4i32(<vscale x 4 x i32> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i32> [[WIDE_MASKED_LOAD1:%.*]])
-; CHECK-ARMPL:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } [[TMP9]], 0
-; CHECK-ARMPL:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i1> } [[TMP9]], 1
-; CHECK-ARMPL:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP10]], ptr align 4 [[TMP13:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
-; CHECK-ARMPL:    call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP12:%.*]], ptr align 1 [[TMP14:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-ARMPL:  [[MIDDLE_BLOCK:.*:]]
-; CHECK-ARMPL:  [[EXIT:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i32, ptr %in_a, i64 %iv
-  %val_a = load i32, ptr %arrayidx_a, align 4
-  %arrayidx_b = getelementptr inbounds i32, ptr %in_b, i64 %iv
-  %val_b = load i32, ptr %arrayidx_b, align 4
-  %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %val_a, i32 %val_b)
-  %result = extractvalue { i32, i1 } %call, 0
-  %overflow = extractvalue { i32, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i32, ptr %out_result, i64 %iv
-  store i32 %result, ptr %arrayidx_result, align 4
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll
index f64d43adecfb8..c6fcbed983d3c 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|frexp|modf|extract|store|with\.overflow)" --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(:|sincos|frexp|modf|extract|store)" --version 5
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s -S -o - | FileCheck %s
 
 define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
@@ -348,471 +348,3 @@ for.body:
 exit:
   ret void
 }
-
-define void @uadd_with_overflow_i32(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @uadd_with_overflow_i32(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i32> [[TMP5]], ptr [[TMP9:%.*]], align 4
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i32, ptr %in_a, i64 %iv
-  %val_a = load i32, ptr %arrayidx_a, align 4
-  %arrayidx_b = getelementptr inbounds i32, ptr %in_b, i64 %iv
-  %val_b = load i32, ptr %arrayidx_b, align 4
-  %call = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %val_a, i32 %val_b)
-  %result = extractvalue { i32, i1 } %call, 0
-  %overflow = extractvalue { i32, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i32, ptr %out_result, i64 %iv
-  store i32 %result, ptr %arrayidx_result, align 4
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @uadd_with_overflow_i64(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @uadd_with_overflow_i64(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> [[WIDE_LOAD:%.*]], <2 x i64> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i64> [[TMP5]], ptr [[TMP9:%.*]], align 8
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i64, ptr %in_a, i64 %iv
-  %val_a = load i64, ptr %arrayidx_a, align 8
-  %arrayidx_b = getelementptr inbounds i64, ptr %in_b, i64 %iv
-  %val_b = load i64, ptr %arrayidx_b, align 8
-  %call = tail call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %val_a, i64 %val_b)
-  %result = extractvalue { i64, i1 } %call, 0
-  %overflow = extractvalue { i64, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i64, ptr %out_result, i64 %iv
-  store i64 %result, ptr %arrayidx_result, align 8
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @sadd_with_overflow_i32(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @sadd_with_overflow_i32(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i32> [[TMP5]], ptr [[TMP9:%.*]], align 4
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i32, ptr %in_a, i64 %iv
-  %val_a = load i32, ptr %arrayidx_a, align 4
-  %arrayidx_b = getelementptr inbounds i32, ptr %in_b, i64 %iv
-  %val_b = load i32, ptr %arrayidx_b, align 4
-  %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %val_a, i32 %val_b)
-  %result = extractvalue { i32, i1 } %call, 0
-  %overflow = extractvalue { i32, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i32, ptr %out_result, i64 %iv
-  store i32 %result, ptr %arrayidx_result, align 4
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @sadd_with_overflow_i64(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @sadd_with_overflow_i64(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> [[WIDE_LOAD:%.*]], <2 x i64> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i64> [[TMP5]], ptr [[TMP9:%.*]], align 8
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i64, ptr %in_a, i64 %iv
-  %val_a = load i64, ptr %arrayidx_a, align 8
-  %arrayidx_b = getelementptr inbounds i64, ptr %in_b, i64 %iv
-  %val_b = load i64, ptr %arrayidx_b, align 8
-  %call = tail call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %val_a, i64 %val_b)
-  %result = extractvalue { i64, i1 } %call, 0
-  %overflow = extractvalue { i64, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i64, ptr %out_result, i64 %iv
-  store i64 %result, ptr %arrayidx_result, align 8
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @usub_with_overflow_i32(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @usub_with_overflow_i32(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.usub.with.overflow.v2i32(<2 x i32> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i32> [[TMP5]], ptr [[TMP9:%.*]], align 4
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i32, ptr %in_a, i64 %iv
-  %val_a = load i32, ptr %arrayidx_a, align 4
-  %arrayidx_b = getelementptr inbounds i32, ptr %in_b, i64 %iv
-  %val_b = load i32, ptr %arrayidx_b, align 4
-  %call = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %val_a, i32 %val_b)
-  %result = extractvalue { i32, i1 } %call, 0
-  %overflow = extractvalue { i32, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i32, ptr %out_result, i64 %iv
-  store i32 %result, ptr %arrayidx_result, align 4
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @usub_with_overflow_i64(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @usub_with_overflow_i64(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> [[WIDE_LOAD:%.*]], <2 x i64> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i64> [[TMP5]], ptr [[TMP9:%.*]], align 8
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i64, ptr %in_a, i64 %iv
-  %val_a = load i64, ptr %arrayidx_a, align 8
-  %arrayidx_b = getelementptr inbounds i64, ptr %in_b, i64 %iv
-  %val_b = load i64, ptr %arrayidx_b, align 8
-  %call = tail call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %val_a, i64 %val_b)
-  %result = extractvalue { i64, i1 } %call, 0
-  %overflow = extractvalue { i64, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i64, ptr %out_result, i64 %iv
-  store i64 %result, ptr %arrayidx_result, align 8
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @ssub_with_overflow_i32(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @ssub_with_overflow_i32(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i32> [[TMP5]], ptr [[TMP9:%.*]], align 4
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i32, ptr %in_a, i64 %iv
-  %val_a = load i32, ptr %arrayidx_a, align 4
-  %arrayidx_b = getelementptr inbounds i32, ptr %in_b, i64 %iv
-  %val_b = load i32, ptr %arrayidx_b, align 4
-  %call = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %val_a, i32 %val_b)
-  %result = extractvalue { i32, i1 } %call, 0
-  %overflow = extractvalue { i32, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i32, ptr %out_result, i64 %iv
-  store i32 %result, ptr %arrayidx_result, align 4
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @ssub_with_overflow_i64(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @ssub_with_overflow_i64(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> [[WIDE_LOAD:%.*]], <2 x i64> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i64> [[TMP5]], ptr [[TMP9:%.*]], align 8
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i64, ptr %in_a, i64 %iv
-  %val_a = load i64, ptr %arrayidx_a, align 8
-  %arrayidx_b = getelementptr inbounds i64, ptr %in_b, i64 %iv
-  %val_b = load i64, ptr %arrayidx_b, align 8
-  %call = tail call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %val_a, i64 %val_b)
-  %result = extractvalue { i64, i1 } %call, 0
-  %overflow = extractvalue { i64, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i64, ptr %out_result, i64 %iv
-  store i64 %result, ptr %arrayidx_result, align 8
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @umul_with_overflow_i32(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @umul_with_overflow_i32(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.umul.with.overflow.v2i32(<2 x i32> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i32> [[TMP5]], ptr [[TMP9:%.*]], align 4
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i32, ptr %in_a, i64 %iv
-  %val_a = load i32, ptr %arrayidx_a, align 4
-  %arrayidx_b = getelementptr inbounds i32, ptr %in_b, i64 %iv
-  %val_b = load i32, ptr %arrayidx_b, align 4
-  %call = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %val_a, i32 %val_b)
-  %result = extractvalue { i32, i1 } %call, 0
-  %overflow = extractvalue { i32, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i32, ptr %out_result, i64 %iv
-  store i32 %result, ptr %arrayidx_result, align 4
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @umul_with_overflow_i64(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @umul_with_overflow_i64(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> [[WIDE_LOAD:%.*]], <2 x i64> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i64> [[TMP5]], ptr [[TMP9:%.*]], align 8
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i64, ptr %in_a, i64 %iv
-  %val_a = load i64, ptr %arrayidx_a, align 8
-  %arrayidx_b = getelementptr inbounds i64, ptr %in_b, i64 %iv
-  %val_b = load i64, ptr %arrayidx_b, align 8
-  %call = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %val_a, i64 %val_b)
-  %result = extractvalue { i64, i1 } %call, 0
-  %overflow = extractvalue { i64, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i64, ptr %out_result, i64 %iv
-  store i64 %result, ptr %arrayidx_result, align 8
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @smul_with_overflow_i32(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @smul_with_overflow_i32(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.smul.with.overflow.v2i32(<2 x i32> [[WIDE_LOAD:%.*]], <2 x i32> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i32> [[TMP5]], ptr [[TMP9:%.*]], align 4
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i32, ptr %in_a, i64 %iv
-  %val_a = load i32, ptr %arrayidx_a, align 4
-  %arrayidx_b = getelementptr inbounds i32, ptr %in_b, i64 %iv
-  %val_b = load i32, ptr %arrayidx_b, align 4
-  %call = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %val_a, i32 %val_b)
-  %result = extractvalue { i32, i1 } %call, 0
-  %overflow = extractvalue { i32, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i32, ptr %out_result, i64 %iv
-  store i32 %result, ptr %arrayidx_result, align 4
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
-
-define void @smul_with_overflow_i64(ptr noalias %in_a, ptr noalias %in_b, ptr noalias writeonly %out_result, ptr noalias writeonly %out_overflow) {
-; CHECK-LABEL: define void @smul_with_overflow_i64(
-; CHECK-SAME: ptr noalias [[IN_A:%.*]], ptr noalias [[IN_B:%.*]], ptr noalias writeonly [[OUT_RESULT:%.*]], ptr noalias writeonly [[OUT_OVERFLOW:%.*]]) {
-; CHECK:  [[VECTOR_BODY:.*:]]
-; CHECK:  [[FOR_BODY:.*:]]
-; CHECK:  [[VECTOR_BODY1:.*:]]
-; CHECK:    [[TMP4:%.*]] = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> [[WIDE_LOAD:%.*]], <2 x i64> [[WIDE_LOAD1:%.*]])
-; CHECK:    [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 0
-; CHECK:    [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP4]], 1
-; CHECK:    store <2 x i64> [[TMP5]], ptr [[TMP9:%.*]], align 8
-; CHECK:    store <2 x i8> [[TMP8:%.*]], ptr [[TMP7:%.*]], align 1
-; CHECK:  [[EXIT:.*:]]
-; CHECK:  [[EXIT1:.*:]]
-;
-entry:
-  br label %for.body
-
-for.body:
-  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
-  %arrayidx_a = getelementptr inbounds i64, ptr %in_a, i64 %iv
-  %val_a = load i64, ptr %arrayidx_a, align 8
-  %arrayidx_b = getelementptr inbounds i64, ptr %in_b, i64 %iv
-  %val_b = load i64, ptr %arrayidx_b, align 8
-  %call = tail call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %val_a, i64 %val_b)
-  %result = extractvalue { i64, i1 } %call, 0
-  %overflow = extractvalue { i64, i1 } %call, 1
-  %zext_overflow = zext i1 %overflow to i8
-  %arrayidx_result = getelementptr inbounds i64, ptr %out_result, i64 %iv
-  store i64 %result, ptr %arrayidx_result, align 8
-  %arrayidx_overflow = getelementptr inbounds i8, ptr %out_overflow, i64 %iv
-  store i8 %zext_overflow, ptr %arrayidx_overflow, align 1
-  %iv.next = add nuw nsw i64 %iv, 1
-  %exitcond.not = icmp eq i64 %iv.next, 1024
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll
index 2f8acd641b571..83c87f1e15e8f 100644
--- a/llvm/test/Transforms/LoopVectorize/struct-return.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll
@@ -166,31 +166,28 @@ exit:
   ret void
 }
 
-; CHECK-REMARKS:	 remark: {{.*}} vectorized loop
+; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable.
+; CHECK-REMARKS:         remark: {{.*}} loop not vectorized: call instruction cannot be vectorized
 define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
 ; CHECK-LABEL: define void @test_overflow_intrinsic(
 ; CHECK-SAME: ptr noalias readonly [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
-; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
-; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[IV_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[IV]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD]])
-; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP1]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP1]], 1
-; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i8>
+; CHECK-NEXT:    [[IN_VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[IN_VAL]], i32 [[IN_VAL]])
+; CHECK-NEXT:    [[EXTRACT_RET:%.*]] = extractvalue { i32, i1 } [[CALL]], 0
+; CHECK-NEXT:    [[EXTRACT_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[CALL]], 1
+; CHECK-NEXT:    [[ZEXT_OVERFLOW:%.*]] = zext i1 [[EXTRACT_OVERFLOW]] to i8
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[OUT_A]], i64 [[IV]]
-; CHECK-NEXT:    store <2 x i32> [[TMP2]], ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    store i32 [[EXTRACT_RET]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[OUT_B]], i64 [[IV]]
-; CHECK-NEXT:    store <2 x i8> [[TMP4]], ptr [[ARRAYIDX4]], align 4
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 2
+; CHECK-NEXT:    store i8 [[ZEXT_OVERFLOW]], ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;

>From ee6c9e3380bcee6c881a50ef16098b0d80b1c03e Mon Sep 17 00:00:00 2001
From: Soumik Kumar Basu <97104371+soumikiith at users.noreply.github.com>
Date: Thu, 5 Feb 2026 20:13:42 +0530
Subject: [PATCH 09/12] [flang][driver] Improve diagnostic for assembly inputs

When an assembly file is passed to the Flang driver, it currently fails
with a confusing internal error mentioning the unsupported `-cc1as`
tool.

This update detects assembly inputs early in the driver and emits a
clear, user-facing diagnostic explaining that Flang does not support
assembly files as input. The logic is implemented in a small helper
function and is intentionally temporary, so it can be removed once Flang
gains support for assembling inputs.

Fixes: #173528
---
 flang/test/Driver/asm-error-fix.s   |  9 +++++++++
 flang/test/lit.cfg.py               |  1 +
 flang/tools/flang-driver/driver.cpp | 27 +++++++++++++++++++++++++++
 3 files changed, 37 insertions(+)
 create mode 100644 flang/test/Driver/asm-error-fix.s

diff --git a/flang/test/Driver/asm-error-fix.s b/flang/test/Driver/asm-error-fix.s
new file mode 100644
index 0000000000000..725366de84eda
--- /dev/null
+++ b/flang/test/Driver/asm-error-fix.s
@@ -0,0 +1,9 @@
+! Test that flang rejects assembly files as input
+
+! RUN: not %flang -c %s 2>&1 | FileCheck %s
+
+! CHECK: error: flang does not support assembly files as input
+
+.globl foo
+foo:
+  ret
diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py
index 4221354df34a2..3a87f9ea06803 100644
--- a/flang/test/lit.cfg.py
+++ b/flang/test/lit.cfg.py
@@ -64,6 +64,7 @@
     ".ll",
     ".fir",
     ".mlir",
+    ".s",
 ]
 
 config.substitutions.append(("%PATH%", config.environment["PATH"]))
diff --git a/flang/tools/flang-driver/driver.cpp b/flang/tools/flang-driver/driver.cpp
index 0840255a739f3..0e7d31a50e105 100644
--- a/flang/tools/flang-driver/driver.cpp
+++ b/flang/tools/flang-driver/driver.cpp
@@ -83,6 +83,28 @@ static void ExpandResponseFiles(llvm::StringSaver &saver,
   }
 }
 
+static bool rejectAssemblyInputs(const llvm::opt::ArgList &args,
+                                 clang::DiagnosticsEngine &diags) {
+  for (const llvm::opt::Arg *arg : args) {
+    if (arg->getOption().getKind() == llvm::opt::Option::InputClass) {
+      llvm::StringRef filename(arg->getValue());
+      llvm::StringRef ext = filename.rsplit('.').second;
+      clang::driver::types::ID type =
+          clang::driver::types::lookupTypeForExtension(ext);
+
+      if (type == clang::driver::types::TY_Asm ||
+          type == clang::driver::types::TY_PP_Asm) {
+        diags.Report(diags.getCustomDiagID(
+            clang::DiagnosticsEngine::Error,
+            "flang does not support assembly files as input: '%0'"))
+            << filename;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 int main(int argc, const char **argv) {
 
   // Initialize variables to call the driver
@@ -147,6 +169,11 @@ int main(int argc, const char **argv) {
   llvm::SmallVector<std::pair<int, const clang::driver::Command *>, 4>
       failingCommands;
 
+  // Reject assembly files as flang does not support assembly inputs.
+  // TODO: Since clang supports this, flang should too.
+  if (rejectAssemblyInputs(c->getInputArgs(), diags))
+    return 1;
+
   // Set the environment variable, FLANG_COMPILER_OPTIONS_STRING, to contain all
   // the compiler options. This is intended for the frontend driver,
   // flang -fc1, to enable the implementation of the COMPILER_OPTIONS

>From 229f68a7e51100ac30997422f813614b979321fa Mon Sep 17 00:00:00 2001
From: Minsoo Choo <minsoochoo0122 at proton.me>
Date: Thu, 5 Feb 2026 09:45:59 -0500
Subject: [PATCH 10/12] [lldb][docs] Add FreeBSD kernel debugging information
 (#179597)

Signed-off-by: Minsoo Choo <minsoochoo0122 at proton.me>
---
 lldb/docs/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/docs/index.rst b/lldb/docs/index.rst
index 3f6e17fd5e6fb..6dea61a246a48 100644
--- a/lldb/docs/index.rst
+++ b/lldb/docs/index.rst
@@ -74,6 +74,7 @@ are welcome:
 * iOS, tvOS, and watchOS device debugging on ARM and AArch64
 * Linux user-space debugging for i386, x86_64, ARM, AArch64, PPC64le, s390x
 * FreeBSD user-space debugging for i386, x86_64, ARM, AArch64, MIPS64, PPC
+* FreeBSD kernel debugging for i386, x86_64, AArch64
 * NetBSD user-space debugging for i386 and x86_64
 * Windows user-space debugging for i386, x86_64, ARM and AArch64 (*)
 

>From 662dc37d6c99eea74255cec09c80a1d9dfec926e Mon Sep 17 00:00:00 2001
From: Minsoo Choo <minsoochoo0122 at proton.me>
Date: Thu, 5 Feb 2026 09:51:46 -0500
Subject: [PATCH 11/12] [lldb] [Process/Utility] Update code url for FreeBSD in
 comments (#178556)

FreeBSD has moved from svn to git. Use https://cgit.freebsd.org/src
instead as it is the source of truth. i386 and mips64 were no longer
supported as of FreeBSD 15 and 14, respectively, so link stable branch
instead of main branch. See [FreeBSD platforms
page](https://www.freebsd.org/platforms/).

---------

Signed-off-by: Minsoo Choo <minsoochoo0122 at proton.me>
---
 .../Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp     | 2 +-
 .../Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp   | 2 +-
 .../Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp  | 2 +-
 .../Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp
index df6a82c11255e..841ae170466b5 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp
@@ -12,7 +12,7 @@
 using namespace lldb_private;
 using namespace lldb;
 
-// http://svnweb.freebsd.org/base/head/sys/x86/include/reg.h
+// https://cgit.freebsd.org/src/tree/sys/x86/include/reg.h?h=stable/14
 struct GPR {
   uint32_t fs;
   uint32_t es;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp
index 1f52c09df12e7..8bc4e09870fce 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp
@@ -61,7 +61,7 @@ static const RegisterSet g_reg_sets_mips64[k_num_register_sets] = {
      g_fp_regnums_mips64},
 };
 
-// http://svnweb.freebsd.org/base/head/sys/mips/include/regnum.h
+// https://cgit.freebsd.org/src/tree/sys/mips/include/regnum.h?h=stable/13
 typedef struct _GPR {
   uint64_t zero;
   uint64_t r1;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp
index d8dfa434335be..cd422556752fc 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp
@@ -13,7 +13,7 @@
 using namespace lldb_private;
 using namespace lldb;
 
-// http://svnweb.freebsd.org/base/head/sys/powerpc/include/reg.h
+// https://cgit.freebsd.org/src/tree/sys/powerpc/include/reg.h
 typedef struct _GPR64 {
   uint64_t r0;
   uint64_t r1;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp
index c361b2abb726b..2a382175acfb3 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp
@@ -15,7 +15,7 @@
 using namespace lldb_private;
 using namespace lldb;
 
-// http://svnweb.freebsd.org/base/head/sys/x86/include/reg.h
+// https://cgit.freebsd.org/src/tree/sys/x86/include/reg.h
 typedef struct _GPR {
   uint64_t r15;
   uint64_t r14;

>From d6d453974f414227837e67f731d50123d250a8d8 Mon Sep 17 00:00:00 2001
From: ProfessionalMenace <vondracekadam00 at gmail.com>
Date: Thu, 5 Feb 2026 17:48:00 +0100
Subject: [PATCH 12/12] applies suggested changes

---
 libc/src/__support/math/ffma.h        | 2 +-
 libc/test/shared/shared_math_test.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/math/ffma.h b/libc/src/__support/math/ffma.h
index 9fb328c326361..64431e8bec407 100644
--- a/libc/src/__support/math/ffma.h
+++ b/libc/src/__support/math/ffma.h
@@ -16,7 +16,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
 
-LIBC_INLINE static float ffma(double x, double y, double z) {
+LIBC_INLINE float ffma(double x, double y, double z) {
   return fputil::fma<float>(x, y, z);
 }
 
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 897165e4e5af3..8c77d8f2764e2 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -122,7 +122,7 @@ TEST(LlvmLibcSharedMathTest, AllDouble) {
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp2(0.0));
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp10(0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::expm1(0.0));
-  EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::ffma(0.0, 0.0, 0.0));
+  EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::ffma(0.0, 0.0, 0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::fsqrt(0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::log(1.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::log10(1.0));