[clang] [llvm] [SVE2.1][Clang][LLVM]Int/FP reduce builtin in Clang and LLVM intrinsic (PR #69926)

via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 1 03:22:56 PST 2023


https://github.com/CarolineConcatto updated https://github.com/llvm/llvm-project/pull/69926

>From 9578865054e6fe83de496df7842fa991ba9c2541 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 23 Oct 2023 12:52:48 +0000
Subject: [PATCH 1/6] [SVE2.1][Clang][LLVM]Int/FP reduce builtin in Clang and
 LLVM intrinsic

This patch implements the builtins in Clang
and the LLVM-IR intrinsic for the following:

// Variants are also available for:
// _s8, _s16, _u16, _s32, _u32, _s64, _u64,
// _f16, _f32, _f64uint8x16_t svaddqv[_u8](svbool_t pg, svuint8_t zn);

// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64
uint8x16_t svandqv[_u8](svbool_t pg, svuint8_t zn);
uint8x16_t sveorqv[_u8](svbool_t pg, svuint8_t zn);
uint8x16_t svorqv[_u8](svbool_t pg, svuint8_t zn);

// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64;
uint8x16_t svmaxqv[_u8](svbool_t pg, svuint8_t zn);
uint8x16_t svminqv[_u8](svbool_t pg, svuint8_t zn);

// Variants are also available for _f32, _f64
float16x8_t svmaxnmqv[_f16](svbool_t pg, svfloat16_t zn);
float16x8_t svminnmqv[_f16](svbool_t pg, svfloat16_t zn);

According to the PR#257[1]

The reduction instruction uses scalable vectors as input and fixed vectors
as output, therefore we changed SVEEmitter to emit fixed vector types in case
the neon header(arm_neon.h) is not present.

[1]https://github.com/ARM-software/acle/pull/257

Co-author: Dinar Temirbulatov <dinar.temirbulatov at arm.com>
---
 clang/include/clang/Basic/TargetBuiltins.h    |   2 +-
 clang/include/clang/Basic/arm_sve.td          |  17 +
 clang/include/clang/Basic/arm_sve_sme_incl.td |   2 +
 clang/lib/CodeGen/CGBuiltin.cpp               |   4 +
 .../acle_sve2p1_fp_reduce.c                   | 285 +++++++
 .../acle_sve2p1_int_reduce.c                  | 784 ++++++++++++++++++
 clang/utils/TableGen/SveEmitter.cpp           |  35 +-
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |  21 +
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  26 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  13 +-
 .../AArch64/sve2p1-intrinsics-fp-reduce.ll    | 189 +++++
 .../AArch64/sve2p1-intrinsics-int-reduce.ll   | 356 ++++++++
 12 files changed, 1715 insertions(+), 19 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_int_reduce.c
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fp-reduce.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-int-reduce.ll

diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 8f7881abf26f7f4..c9f9cbec7493bfc 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -309,7 +309,7 @@ namespace clang {
     bool isTupleSet() const { return Flags & IsTupleSet; }
     bool isReadZA() const { return Flags & IsReadZA; }
     bool isWriteZA() const { return Flags & IsWriteZA; }
-
+    bool isReductionQV() const { return Flags & IsReductionQV; }
     uint64_t getBits() const { return Flags; }
     bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
   };
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index b5baafedd139602..e8fef1e7a8dfb0d 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1859,6 +1859,23 @@ def SVBGRP   : SInst<"svbgrp[_{d}]",   "ddd", "UcUsUiUl", MergeNone, "aarch64_sv
 def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">;
 }
 
+// Standalone sve2.1 builtins
+let TargetGuard = "sve2p1" in {
+def SVORQV   : SInst<"svorqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orqv", [IsReductionQV]>;
+def SVEORQV  : SInst<"sveorqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorqv", [IsReductionQV]>;
+def SVADDQV  : SInst<"svaddqv[_{d}]", "{Pd", "hfdcsilUcUsUiUl", MergeNone, "aarch64_sve_addqv", [IsReductionQV]>;
+def SVANDQV  : SInst<"svandqv[_{d}]", "{Pd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andqv", [IsReductionQV]>;
+def SVSMAXQV : SInst<"svmaxqv[_{d}]", "{Pd", "csil", MergeNone, "aarch64_sve_smaxqv", [IsReductionQV]>;
+def SVUMAXQV : SInst<"svmaxqv[_{d}]", "{Pd", "UcUsUiUl", MergeNone, "aarch64_sve_umaxqv", [IsReductionQV]>;
+def SVSMINQV : SInst<"svminqv[_{d}]", "{Pd", "csil", MergeNone, "aarch64_sve_sminqv", [IsReductionQV]>;
+def SVUMINQV : SInst<"svminqv[_{d}]", "{Pd", "UcUsUiUl", MergeNone, "aarch64_sve_uminqv", [IsReductionQV]>;
+
+def SVFMAXNMQV: SInst<"svmaxnmqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fmaxnmqv", [IsReductionQV]>;
+def SVFMINNMQV: SInst<"svminnmqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fminnmqv", [IsReductionQV]>;
+def SVFMAXQV: SInst<"svmaxqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fmaxqv", [IsReductionQV]>;
+def SVFMINQV: SInst<"svminqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fminqv", [IsReductionQV]>;
+}
+
 let TargetGuard = "sve2p1" in {
 def SVFCLAMP   : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [], []>;
 def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone], []>;
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 3a7a5b51b25801e..9fe497173b56ac6 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -128,6 +128,7 @@
 // Z: const pointer to uint64_t
 
 // Prototype modifiers added for SVE2p1
+// {: 128b vector
 // }: svcount_t
 
 class MergeType<int val, string suffix=""> {
@@ -224,6 +225,7 @@ def IsSharedZA                : FlagType<0x8000000000>;
 def IsPreservesZA             : FlagType<0x10000000000>;
 def IsReadZA                  : FlagType<0x20000000000>;
 def IsWriteZA                 : FlagType<0x40000000000>;
+def IsReductionQV             : FlagType<0x80000000000>;
 
 // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
 class ImmCheckType<int val> {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..86e77db4b914571 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9834,6 +9834,10 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,
   if (TypeFlags.isOverloadCvt())
     return {Ops[0]->getType(), Ops.back()->getType()};
 
+  if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
+      ResultType->isVectorTy())
+    return {ResultType, Ops[1]->getType()};
+
   assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
   return {DefaultType};
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fp_reduce.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
new file mode 100644
index 000000000000000..e58cf4e49a37f92
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fp_reduce.c
@@ -0,0 +1,285 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// FADDQV
+
+// CHECK-LABEL: @test_svaddqv_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.addqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.addqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+float16x8_t test_svaddqv_f16(svbool_t pg, svfloat16_t op)
+{
+  return SVE_ACLE_FUNC(svaddqv,,_f16,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svaddqv_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.addqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_f32u10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.addqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+float32x4_t test_svaddqv_f32(svbool_t pg, svfloat32_t op)
+{
+  return SVE_ACLE_FUNC(svaddqv,,_f32,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svaddqv_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.addqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_f64u10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.addqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+float64x2_t test_svaddqv_f64(svbool_t pg, svfloat64_t op)
+{
+  return SVE_ACLE_FUNC(svaddqv,,_f64,)(pg, op);
+}
+
+
+// FMAXQV
+
+// CHECK-LABEL: @test_svmaxqv_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fmaxqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fmaxqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+float16x8_t test_svmaxqv_f16(svbool_t pg, svfloat16_t op)
+{
+  return SVE_ACLE_FUNC(svmaxqv,,_f16,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svmaxqv_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fmaxqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_f32u10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fmaxqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+float32x4_t test_svmaxqv_f32(svbool_t pg, svfloat32_t op)
+{
+  return SVE_ACLE_FUNC(svmaxqv,,_f32,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svmaxqv_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fmaxqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_f64u10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fmaxqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+float64x2_t test_svmaxqv_f64(svbool_t pg, svfloat64_t op)
+{
+  return SVE_ACLE_FUNC(svmaxqv,,_f64,)(pg, op);
+}
+
+
+// FMINQV
+
+// CHECK-LABEL: @test_svminqv_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fminqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fminqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+float16x8_t test_svminqv_f16(svbool_t pg, svfloat16_t op)
+{
+  return SVE_ACLE_FUNC(svminqv,,_f16,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svminqv_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fminqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_f32u10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fminqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+float32x4_t test_svminqv_f32(svbool_t pg, svfloat32_t op)
+{
+  return SVE_ACLE_FUNC(svminqv,,_f32,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svminqv_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fminqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_f64u10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fminqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+float64x2_t test_svminqv_f64(svbool_t pg, svfloat64_t op)
+{
+  return SVE_ACLE_FUNC(svminqv,,_f64,)(pg, op);
+}
+
+
+// FMAXNMQV
+
+// CHECK-LABEL: @test_svmaxnmqv_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fmaxnmqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svmaxnmqv_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fmaxnmqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+float16x8_t test_svmaxnmqv_f16(svbool_t pg, svfloat16_t op)
+{
+  return SVE_ACLE_FUNC(svmaxnmqv,,_f16,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svmaxnmqv_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svmaxnmqv_f32u10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+float32x4_t test_svmaxnmqv_f32(svbool_t pg, svfloat32_t op)
+{
+  return SVE_ACLE_FUNC(svmaxnmqv,,_f32,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svmaxnmqv_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fmaxnmqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svmaxnmqv_f64u10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fmaxnmqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+float64x2_t test_svmaxnmqv_f64(svbool_t pg, svfloat64_t op)
+{
+  return SVE_ACLE_FUNC(svmaxnmqv,,_f64,)(pg, op);
+}
+
+
+// FMINNMQV
+
+// CHECK-LABEL: @test_svminnmqv_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fminnmqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svminnmqv_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x half> @llvm.aarch64.sve.fminnmqv.v8f16.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x half> [[TMP1]]
+//
+float16x8_t test_svminnmqv_f16(svbool_t pg, svfloat16_t op)
+{
+  return SVE_ACLE_FUNC(svminnmqv,,_f16,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svminnmqv_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svminnmqv_f32u10__SVBool_tu13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x float> [[TMP1]]
+//
+float32x4_t test_svminnmqv_f32(svbool_t pg, svfloat32_t op)
+{
+  return SVE_ACLE_FUNC(svminnmqv,,_f32,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svminnmqv_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fminnmqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svminnmqv_f64u10__SVBool_tu13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.aarch64.sve.fminnmqv.v2f64.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x double> [[TMP1]]
+//
+float64x2_t test_svminnmqv_f64(svbool_t pg, svfloat64_t op)
+{
+  return SVE_ACLE_FUNC(svminnmqv,,_f64,)(pg, op);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_int_reduce.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_int_reduce.c
new file mode 100644
index 000000000000000..d060339fe9a7fac
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_int_reduce.c
@@ -0,0 +1,784 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+
+// ADDQV
+
+// CHECK-LABEL: @test_svaddqv_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svaddqv_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_svaddqv_s8(svbool_t pg, svint8_t op1) {
+  return SVE_ACLE_FUNC(svaddqv,_s8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svaddqv_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_svaddqv_s16(svbool_t pg, svint16_t op1) {
+  return SVE_ACLE_FUNC(svaddqv,_s16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svaddqv_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.addqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_s32u10__SVBool_tu11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.addqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_svaddqv_s32(svbool_t pg, svint32_t op1) {
+  return SVE_ACLE_FUNC(svaddqv,_s32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svaddqv_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.addqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_s64u10__SVBool_tu11__SVInt64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.addqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+int64x2_t test_svaddqv_s64(svbool_t pg, svint64_t op1) {
+  return SVE_ACLE_FUNC(svaddqv,_s64,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svaddqv_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svaddqv_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_svaddqv_u8(svbool_t pg, svuint8_t op1) {
+  return SVE_ACLE_FUNC(svaddqv,_u8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svaddqv_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_svaddqv_u16(svbool_t pg, svuint16_t op1) {
+  return SVE_ACLE_FUNC(svaddqv,_u16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svaddqv_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.addqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.addqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+uint32x4_t test_svaddqv_u32(svbool_t pg, svuint32_t op1) {
+  return SVE_ACLE_FUNC(svaddqv,_u32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svaddqv_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.addqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svaddqv_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.addqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+uint64x2_t test_svaddqv_u64(svbool_t pg, svuint64_t op1) {
+  return SVE_ACLE_FUNC(svaddqv,_u64,,)(pg, op1);
+}
+
+
+// ANDQV
+
+// CHECK-LABEL: @test_svandqv_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svandqv_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_svandqv_s8(svbool_t pg, svint8_t op1) {
+  return SVE_ACLE_FUNC(svandqv,_s8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svandqv_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svandqv_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_svandqv_s16(svbool_t pg, svint16_t op1) {
+  return SVE_ACLE_FUNC(svandqv,_s16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svandqv_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.andqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svandqv_s32u10__SVBool_tu11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.andqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_svandqv_s32(svbool_t pg, svint32_t op1) {
+  return SVE_ACLE_FUNC(svandqv,_s32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svandqv_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.andqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svandqv_s64u10__SVBool_tu11__SVInt64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.andqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+int64x2_t test_svandqv_s64(svbool_t pg, svint64_t op1) {
+  return SVE_ACLE_FUNC(svandqv,_s64,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svandqv_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svandqv_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_svandqv_u8(svbool_t pg, svuint8_t op1) {
+  return SVE_ACLE_FUNC(svandqv,_u8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svandqv_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svandqv_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_svandqv_u16(svbool_t pg, svuint16_t op1) {
+  return SVE_ACLE_FUNC(svandqv,_u16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svandqv_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.andqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svandqv_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.andqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+uint32x4_t test_svandqv_u32(svbool_t pg, svuint32_t op1) {
+  return SVE_ACLE_FUNC(svandqv,_u32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svandqv_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.andqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svandqv_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.andqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+uint64x2_t test_svandqv_u64(svbool_t pg, svuint64_t op1) {
+  return SVE_ACLE_FUNC(svandqv,_u64,,)(pg, op1);
+}
+
+
+// EORQV
+
+// CHECK-LABEL: @test_sveorqv_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.eorqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_sveorqv_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.eorqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_sveorqv_s8(svbool_t pg, svint8_t op1) {
+  return SVE_ACLE_FUNC(sveorqv,_s8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_sveorqv_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_sveorqv_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_sveorqv_s16(svbool_t pg, svint16_t op1) {
+  return SVE_ACLE_FUNC(sveorqv,_s16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_sveorqv_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_sveorqv_s32u10__SVBool_tu11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_sveorqv_s32(svbool_t pg, svint32_t op1) {
+  return SVE_ACLE_FUNC(sveorqv,_s32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_sveorqv_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.eorqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_sveorqv_s64u10__SVBool_tu11__SVInt64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.eorqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+int64x2_t test_sveorqv_s64(svbool_t pg, svint64_t op1) {
+  return SVE_ACLE_FUNC(sveorqv,_s64,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_sveorqv_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.eorqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_sveorqv_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.eorqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_sveorqv_u8(svbool_t pg, svuint8_t op1) {
+  return SVE_ACLE_FUNC(sveorqv,_u8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_sveorqv_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_sveorqv_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_sveorqv_u16(svbool_t pg, svuint16_t op1) {
+  return SVE_ACLE_FUNC(sveorqv,_u16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_sveorqv_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_sveorqv_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+uint32x4_t test_sveorqv_u32(svbool_t pg, svuint32_t op1) {
+  return SVE_ACLE_FUNC(sveorqv,_u32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_sveorqv_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.eorqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_sveorqv_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.eorqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+uint64x2_t test_sveorqv_u64(svbool_t pg, svuint64_t op1) {
+  return SVE_ACLE_FUNC(sveorqv,_u64,,)(pg, op1);
+}
+
+
+// ORQV
+
+// CHECK-LABEL: @test_svorqv_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.orqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z14test_svorqv_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.orqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_svorqv_s8(svbool_t pg, svint8_t op1) {
+  return SVE_ACLE_FUNC(svorqv,_s8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svorqv_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svorqv_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_svorqv_s16(svbool_t pg, svint16_t op1) {
+  return SVE_ACLE_FUNC(svorqv,_s16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svorqv_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.orqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svorqv_s32u10__SVBool_tu11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.orqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_svorqv_s32(svbool_t pg, svint32_t op1) {
+  return SVE_ACLE_FUNC(svorqv,_s32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svorqv_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.orqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svorqv_s64u10__SVBool_tu11__SVInt64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.orqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+int64x2_t test_svorqv_s64(svbool_t pg, svint64_t op1) {
+  return SVE_ACLE_FUNC(svorqv,_s64,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svorqv_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.orqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z14test_svorqv_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.orqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_svorqv_u8(svbool_t pg, svuint8_t op1) {
+  return SVE_ACLE_FUNC(svorqv,_u8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svorqv_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svorqv_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_svorqv_u16(svbool_t pg, svuint16_t op1) {
+  return SVE_ACLE_FUNC(svorqv,_u16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svorqv_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.orqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svorqv_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.orqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+uint32x4_t test_svorqv_u32(svbool_t pg, svuint32_t op1) {
+  return SVE_ACLE_FUNC(svorqv,_u32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svorqv_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.orqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svorqv_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.orqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+uint64x2_t test_svorqv_u64(svbool_t pg, svuint64_t op1) {
+  return SVE_ACLE_FUNC(svorqv,_u64,,)(pg, op1);
+}
+
+
+// SMAXQV
+
+// CHECK-LABEL: @test_svmaxqv_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.smaxqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svmaxqv_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.smaxqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_svmaxqv_s8(svbool_t pg, svint8_t op1) {
+  return SVE_ACLE_FUNC(svmaxqv,_s8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svmaxqv_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_svmaxqv_s16(svbool_t pg, svint16_t op1) {
+  return SVE_ACLE_FUNC(svmaxqv,_s16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svmaxqv_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.smaxqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_s32u10__SVBool_tu11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.smaxqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_svmaxqv_s32(svbool_t pg, svint32_t op1) {
+  return SVE_ACLE_FUNC(svmaxqv,_s32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svmaxqv_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.smaxqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_s64u10__SVBool_tu11__SVInt64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.smaxqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+int64x2_t test_svmaxqv_s64(svbool_t pg, svint64_t op1) {
+  return SVE_ACLE_FUNC(svmaxqv,_s64,,)(pg, op1);
+}
+
+
+// UMAXQV
+
+// CHECK-LABEL: @test_svmaxqv_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.umaxqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svmaxqv_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.umaxqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_svmaxqv_u8(svbool_t pg, svuint8_t op1) {
+  return SVE_ACLE_FUNC(svmaxqv,_u8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svmaxqv_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.umaxqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.umaxqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_svmaxqv_u16(svbool_t pg, svuint16_t op1) {
+  return SVE_ACLE_FUNC(svmaxqv,_u16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svmaxqv_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.umaxqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.umaxqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+uint32x4_t test_svmaxqv_u32(svbool_t pg, svuint32_t op1) {
+  return SVE_ACLE_FUNC(svmaxqv,_u32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svmaxqv_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.umaxqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svmaxqv_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.umaxqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+uint64x2_t test_svmaxqv_u64(svbool_t pg, svuint64_t op1) {
+  return SVE_ACLE_FUNC(svmaxqv,_u64,,)(pg, op1);
+}
+
+
+// SMINQV
+
+// CHECK-LABEL: @test_svminqv_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.sminqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svminqv_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.sminqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_svminqv_s8(svbool_t pg, svint8_t op1) {
+  return SVE_ACLE_FUNC(svminqv,_s8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svminqv_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.sminqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.sminqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_svminqv_s16(svbool_t pg, svint16_t op1) {
+  return SVE_ACLE_FUNC(svminqv,_s16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svminqv_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.sminqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_s32u10__SVBool_tu11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.sminqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_svminqv_s32(svbool_t pg, svint32_t op1) {
+  return SVE_ACLE_FUNC(svminqv,_s32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svminqv_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.sminqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_s64u10__SVBool_tu11__SVInt64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.sminqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+int64x2_t test_svminqv_s64(svbool_t pg, svint64_t op1) {
+  return SVE_ACLE_FUNC(svminqv,_s64,,)(pg, op1);
+}
+
+
+// UMINQV
+
+// CHECK-LABEL: @test_svminqv_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.uminqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z15test_svminqv_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.aarch64.sve.uminqv.v16i8.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_svminqv_u8(svbool_t pg, svuint8_t op1) {
+  return SVE_ACLE_FUNC(svminqv,_u8,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svminqv_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_svminqv_u16(svbool_t pg, svuint16_t op1) {
+  return SVE_ACLE_FUNC(svminqv,_u16,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svminqv_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.uminqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_u32u10__SVBool_tu12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.aarch64.sve.uminqv.v4i32.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+//
+uint32x4_t test_svminqv_u32(svbool_t pg, svuint32_t op1) {
+  return SVE_ACLE_FUNC(svminqv,_u32,,)(pg, op1);
+}
+
+// CHECK-LABEL: @test_svminqv_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.uminqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svminqv_u64u10__SVBool_tu12__SVUint64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.aarch64.sve.uminqv.v2i64.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]])
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+uint64x2_t test_svminqv_u64(svbool_t pg, svuint64_t op1) {
+  return SVE_ACLE_FUNC(svminqv,_u64,,)(pg, op1);
+}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index ab2b22233987a3c..3bfb66091a76dfd 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -91,6 +91,7 @@ class SVEType {
   bool isScalar() const { return NumVectors == 0; }
   bool isVector() const { return NumVectors > 0; }
   bool isScalableVector() const { return isVector() && IsScalable; }
+  bool isFixedLengthVector() const { return isVector() && !IsScalable; }
   bool isChar() const { return ElementBitwidth == 8; }
   bool isVoid() const { return Void & !Pointer; }
   bool isDefault() const { return DefaultType; }
@@ -449,7 +450,8 @@ std::string SVEType::builtin_str() const {
     return S;
   }
 
-  assert(isScalableVector() && "Unsupported type");
+  if (isFixedLengthVector())
+    return "V" + utostr(getNumElements() * NumVectors) + S;
   return "q" + utostr(getNumElements() * NumVectors) + S;
 }
 
@@ -466,6 +468,8 @@ std::string SVEType::str() const {
   else {
     if (isScalableVector() || isSvcount())
       S += "sv";
+    if (isFixedLengthVector())
+      S += "__sve_";
     if (!Signed && !isFloatingPoint())
       S += "u";
 
@@ -482,7 +486,7 @@ std::string SVEType::str() const {
 
     if (!isScalarPredicate() && !isPredicateVector() && !isSvcount())
       S += utostr(ElementBitwidth);
-    if (!isScalableVector() && isVector())
+    if (isFixedLengthVector())
       S += "x" + utostr(getNumElements());
     if (NumVectors > 1)
       S += "x" + utostr(NumVectors);
@@ -592,6 +596,11 @@ void SVEType::applyModifier(char Mod) {
     Bitwidth = 16;
     ElementBitwidth = 1;
     break;
+  case '{':
+    IsScalable = false;
+    Bitwidth = 128;
+    NumVectors = 1;
+    break;
   case 's':
   case 'a':
     Bitwidth = ElementBitwidth;
@@ -1224,7 +1233,27 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
 
   OS << "typedef __SVBFloat16_t svbfloat16_t;\n";
 
-  OS << "#include <arm_bf16.h>\n";
+  OS << "#include <arm_bf16.h>\n\n";
+
+  OS << "typedef __attribute__((vector_size (16))) int8_t __sve_int8x16_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) int16_t __sve_int16x8_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) int32_t __sve_int32x4_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) int64_t __sve_int64x2_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) uint8_t __sve_uint8x16_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) uint16_t "
+        "__sve_uint16x8_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) uint32_t "
+        "__sve_uint32x4_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) uint64_t "
+        "__sve_uint64x2_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) float16_t "
+        "__sve_float16x8_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) float32_t "
+        "__sve_float32x4_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) float64_t "
+        "__sve_float64x2_t;\n";
+  OS << "typedef __attribute__((vector_size (16))) bfloat16_t "
+        "__sve_bfloat16x8;\n";
 
   OS << "typedef __SVFloat32_t svfloat32_t;\n";
   OS << "typedef __SVFloat64_t svfloat64_t;\n";
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index a42e2c49cb477ba..55015bd6fe9f600 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1401,6 +1401,13 @@ class AdvSIMD_SVE_Reduce_Intrinsic
                llvm_anyvector_ty],
               [IntrNoMem]>;
 
+class AdvSIMD_SVE_V128_Reduce_Intrinsic
+  : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+              [LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
+               llvm_anyvector_ty],
+               [IntrNoMem]>;
+
+
 class AdvSIMD_SVE_SADDV_Reduce_Intrinsic
   : DefaultAttrsIntrinsic<[llvm_i64_ty],
               [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1686,6 +1693,15 @@ def int_aarch64_sve_sqsub_x   : AdvSIMD_2VectorArg_Intrinsic;
 def int_aarch64_sve_uqadd_x   : AdvSIMD_2VectorArg_Intrinsic;
 def int_aarch64_sve_uqsub_x   : AdvSIMD_2VectorArg_Intrinsic;
 
+def int_aarch64_sve_orqv      : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_eorqv     : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_andqv     : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_smaxqv    : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_umaxqv    : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_sminqv    : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_uminqv    : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+
+
 // Shifts
 
 def int_aarch64_sve_asr      : AdvSIMD_Pred2VectorArg_Intrinsic;
@@ -1996,6 +2012,11 @@ def int_aarch64_sve_fmaxv   : AdvSIMD_SVE_Reduce_Intrinsic;
 def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_Reduce_Intrinsic;
 def int_aarch64_sve_fminv   : AdvSIMD_SVE_Reduce_Intrinsic;
 def int_aarch64_sve_fminnmv : AdvSIMD_SVE_Reduce_Intrinsic;
+def int_aarch64_sve_addqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_fmaxnmqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_fminnmqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_fmaxqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
+def int_aarch64_sve_fminqv : AdvSIMD_SVE_V128_Reduce_Intrinsic;
 
 //
 // Floating-point conversions
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d599ac4689e5cb3..1cd84bc3d1c0edb 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3974,11 +3974,11 @@ def BFCLAMP_ZZZ : sve2p1_fclamp<"bfclamp", 0b00, ZPR16>;
 // SME2.1 or SVE2.1 instructions
 //===----------------------------------------------------------------------===//
 let Predicates = [HasSVE2p1_or_HasSME2p1] in {
-defm FADDQV   : sve2p1_fp_reduction_q<0b000, "faddqv">;
-defm FMAXNMQV : sve2p1_fp_reduction_q<0b100, "fmaxnmqv">;
-defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv">;
-defm FMAXQV   : sve2p1_fp_reduction_q<0b110, "fmaxqv">;
-defm FMINQV   : sve2p1_fp_reduction_q<0b111, "fminqv">;
+defm FADDQV   : sve2p1_fp_reduction_q<0b000, "faddqv", int_aarch64_sve_addqv>;
+defm FMAXNMQV : sve2p1_fp_reduction_q<0b100, "fmaxnmqv", int_aarch64_sve_fmaxnmqv>;
+defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv", int_aarch64_sve_fminnmqv>;
+defm FMAXQV   : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
+defm FMINQV   : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;
 
 defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
 def EXTQ_ZZI : sve2p1_extq<"extq">;
@@ -3986,14 +3986,14 @@ def EXTQ_ZZI : sve2p1_extq<"extq">;
 defm PMOV_PZI : sve2p1_vector_to_pred<"pmov">;
 defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov">;
 
-defm ORQV_VPZ   : sve2p1_int_reduce_q<0b1100, "orqv">;
-defm EORQV_VPZ  : sve2p1_int_reduce_q<0b1101, "eorqv">;
-defm ANDQV_VPZ  : sve2p1_int_reduce_q<0b1110, "andqv">;
-defm ADDQV_VPZ  : sve2p1_int_reduce_q<0b0001, "addqv">;
-defm SMAXQV_VPZ : sve2p1_int_reduce_q<0b0100, "smaxqv">;
-defm UMAXQV_VPZ : sve2p1_int_reduce_q<0b0101, "umaxqv">;
-defm SMINQV_VPZ : sve2p1_int_reduce_q<0b0110, "sminqv">;
-defm UMINQV_VPZ : sve2p1_int_reduce_q<0b0111, "uminqv">;
+defm ORQV_VPZ   : sve2p1_int_reduce_q<0b1100, "orqv", int_aarch64_sve_orqv>;
+defm EORQV_VPZ  : sve2p1_int_reduce_q<0b1101, "eorqv", int_aarch64_sve_eorqv>;
+defm ANDQV_VPZ  : sve2p1_int_reduce_q<0b1110, "andqv", int_aarch64_sve_andqv>;
+defm ADDQV_VPZ  : sve2p1_int_reduce_q<0b0001, "addqv", int_aarch64_sve_addqv>;
+defm SMAXQV_VPZ : sve2p1_int_reduce_q<0b0100, "smaxqv", int_aarch64_sve_smaxqv>;
+defm UMAXQV_VPZ : sve2p1_int_reduce_q<0b0101, "umaxqv", int_aarch64_sve_umaxqv>;
+defm SMINQV_VPZ : sve2p1_int_reduce_q<0b0110, "sminqv", int_aarch64_sve_sminqv>;
+defm UMINQV_VPZ : sve2p1_int_reduce_q<0b0111, "uminqv", int_aarch64_sve_uminqv>;
 
 defm TBXQ_ZZZ : sve2_int_perm_tbx<"tbxq", 0b10, null_frag>;
 defm ZIPQ1_ZZZ : sve2p1_permute_vec_elems_q<0b000, "zipq1">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7bb457d9188210c..f624fb2f78926d5 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -9867,10 +9867,14 @@ class sve2p1_fp_reduction_q<bits<2> sz, bits<3> opc, string mnemonic,
   let mayRaiseFPException = 1;
 }
 
-multiclass sve2p1_fp_reduction_q<bits<3> opc, string mnemonic> {
+multiclass sve2p1_fp_reduction_q<bits<3> opc, string mnemonic, SDPatternOperator op> {
   def _H : sve2p1_fp_reduction_q<0b01, opc, mnemonic, ZPR16, "8h">;
   def _S : sve2p1_fp_reduction_q<0b10, opc, mnemonic, ZPR32, "4s">;
   def _D : sve2p1_fp_reduction_q<0b11, opc, mnemonic, ZPR64, "2d">;
+
+  def : SVE_2_Op_Pat<v8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<v4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<v2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
 
@@ -10036,11 +10040,16 @@ class sve2p1_int_reduce_q<bits<2> sz, bits<4> opc, string mnemonic,
   let hasSideEffects = 0;
 }
 
-multiclass sve2p1_int_reduce_q<bits<4> opc, string mnemonic> {
+multiclass sve2p1_int_reduce_q<bits<4> opc, string mnemonic, SDPatternOperator op> {
   def _B : sve2p1_int_reduce_q<0b00, opc, mnemonic, ZPR8,  "16b">;
   def _H : sve2p1_int_reduce_q<0b01, opc, mnemonic, ZPR16, "8h">;
   def _S : sve2p1_int_reduce_q<0b10, opc, mnemonic, ZPR32, "4s">;
   def _D : sve2p1_int_reduce_q<0b11, opc, mnemonic, ZPR64, "2d">;
+
+  def : SVE_2_Op_Pat<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+  def : SVE_2_Op_Pat<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
 }
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fp-reduce.ll
new file mode 100644
index 000000000000000..7957366fce3b05a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fp-reduce.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sme2p1 < %s | FileCheck %s
+
+;
+; FMAXNMQV
+;
+
+define <8 x half> @fmaxnmqv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
+; CHECK-LABEL: fmaxnmqv_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x half> @llvm.aarch64.sve.fmaxnmqv.v8f16.nxv8f16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x half> %a)
+  ret <8 x half> %res
+}
+
+define <4 x float> @fmaxnmqv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
+; CHECK-LABEL: fmaxnmqv_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x float> %a)
+  ret <4 x float> %res
+}
+
+define <2 x double> @fmaxnmqv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
+; CHECK-LABEL: fmaxnmqv_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxnmqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x double> @llvm.aarch64.sve.fmaxnmqv.v2f64.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                    <vscale x 2 x double> %a)
+  ret <2 x double> %res
+}
+
+;
+; FMINNMQV
+;
+
+define <8 x half> @fminnmqv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
+; CHECK-LABEL: fminnmqv_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x half> @llvm.aarch64.sve.fminnmqv.v8f16.nxv8f16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x half> %a)
+  ret <8 x half> %res
+}
+
+define <4 x float> @fminnmqv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
+; CHECK-LABEL: fminnmqv_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x float> %a)
+  ret <4 x float> %res
+}
+
+define <2 x double> @fminnmqv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
+; CHECK-LABEL: fminnmqv_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminnmqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x double> @llvm.aarch64.sve.fminnmqv.v2f64.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                    <vscale x 2 x double> %a)
+  ret <2 x double> %res
+}
+
+;
+; FADDQV
+;
+
+define <8 x half> @faddqv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
+; CHECK-LABEL: faddqv_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x half> @llvm.aarch64.sve.addqv.v8f16.nxv8f16(<vscale x 8 x i1> %pg,
+                                                               <vscale x 8 x half> %a)
+  ret <8 x half> %res
+}
+
+define <4 x float> @faddqv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
+; CHECK-LABEL: faddqv_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x float> @llvm.aarch64.sve.addqv.v4f32.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                <vscale x 4 x float> %a)
+  ret <4 x float> %res
+}
+
+define <2 x double> @faddqv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
+; CHECK-LABEL: faddqv_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x double> @llvm.aarch64.sve.addqv.v2f64.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x double> %a)
+  ret <2 x double> %res
+}
+
+;
+; FMINQV
+;
+
+define <8 x half> @fminqv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
+; CHECK-LABEL: fminqv_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x half> @llvm.aarch64.sve.fminqv.v8f16.nxv8f16(<vscale x 8 x i1> %pg,
+                                                                <vscale x 8 x half> %a)
+  ret <8 x half> %res
+}
+
+define <4 x float> @fminqv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
+; CHECK-LABEL: fminqv_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x float> @llvm.aarch64.sve.fminqv.v4f32.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x float> %a)
+  ret <4 x float> %res
+}
+
+define <2 x double> @fminqv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
+; CHECK-LABEL: fminqv_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fminqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x double> @llvm.aarch64.sve.fminqv.v2f64.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x double> %a)
+  ret <2 x double> %res
+}
+
+;
+; FMAXQV
+;
+
+define <8 x half> @fmaxqv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
+; CHECK-LABEL: fmaxqv_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x half> @llvm.aarch64.sve.fmaxqv.v8f16.nxv8f16(<vscale x 8 x i1> %pg,
+                                                                <vscale x 8 x half> %a)
+  ret <8 x half> %res
+}
+
+define <4 x float> @fmaxqv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
+; CHECK-LABEL: fmaxqv_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x float> @llvm.aarch64.sve.fmaxqv.v4f32.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x float> %a)
+  ret <4 x float> %res
+}
+
+define <2 x double> @fmaxqv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
+; CHECK-LABEL: fmaxqv_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmaxqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x double> @llvm.aarch64.sve.fmaxqv.v2f64.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x double> %a)
+  ret <2 x double> %res
+}
+
+declare <8 x half> @llvm.aarch64.sve.fmaxnmqv.v8f16.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
+declare <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
+declare <2 x double> @llvm.aarch64.sve.fmaxnmqv.v2f64.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
+declare <8 x half> @llvm.aarch64.sve.fminnmqv.v8f16.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
+declare <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
+declare <2 x double> @llvm.aarch64.sve.fminnmqv.v2f64.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
+declare <8 x half> @llvm.aarch64.sve.addqv.v8f16.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
+declare <4 x float> @llvm.aarch64.sve.addqv.v4f32.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
+declare <2 x double> @llvm.aarch64.sve.addqv.v2f64.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
+declare <8 x half> @llvm.aarch64.sve.fminqv.v8f16.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
+declare <4 x float> @llvm.aarch64.sve.fminqv.v4f32.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
+declare <2 x double> @llvm.aarch64.sve.fminqv.v2f64.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
+declare <8 x half> @llvm.aarch64.sve.fmaxqv.v8f16.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
+declare <4 x float> @llvm.aarch64.sve.fmaxqv.v4f32.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
+declare <2 x double> @llvm.aarch64.sve.fmaxqv.v2f64.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-int-reduce.ll
new file mode 100644
index 000000000000000..a730ba9c9320930
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-int-reduce.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sme2p1 < %s | FileCheck %s
+
+;
+; ORQV
+;
+
+define <16 x i8> @orqv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: orqv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orqv v0.16b, p0, z0.b
+; CHECK-NEXT:    ret
+  %res = call <16 x i8> @llvm.aarch64.sve.orqv.v16i8.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @orqv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: orqv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @orqv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: orqv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.aarch64.sve.orqv.v4i32.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a);
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @orqv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: orqv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    orqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x i64> @llvm.aarch64.sve.orqv.v2i64.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a);
+  ret <2 x i64> %res
+}
+
+;
+; EORQV
+;
+
+define <16 x i8> @eorqv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: eorqv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eorqv v0.16b, p0, z0.b
+; CHECK-NEXT:    ret
+  %res = call <16 x i8> @llvm.aarch64.sve.eorqv.v16i8.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @eorqv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: eorqv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eorqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @eorqv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: eorqv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eorqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a);
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @eorqv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: eorqv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    eorqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x i64> @llvm.aarch64.sve.eorqv.v2i64.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a);
+  ret <2 x i64> %res
+}
+
+;
+; ANDQV
+;
+
+define <16 x i8> @andqv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: andqv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    andqv v0.16b, p0, z0.b
+; CHECK-NEXT:    ret
+  %res = call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @andqv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: andqv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    andqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @andqv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: andqv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    andqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.aarch64.sve.andqv.v4i32.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a);
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @andqv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: andqv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    andqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x i64> @llvm.aarch64.sve.andqv.v2i64.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a);
+  ret <2 x i64> %res
+}
+
+;
+; ADDQV
+;
+
+define <16 x i8> @addqv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: addqv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addqv v0.16b, p0, z0.b
+; CHECK-NEXT:    ret
+  %res = call <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @addqv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: addqv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @addqv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: addqv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.aarch64.sve.addqv.v4i32.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a);
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @addqv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: addqv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x i64> @llvm.aarch64.sve.addqv.v2i64.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a);
+  ret <2 x i64> %res
+}
+
+;
+; SMAXQV
+;
+
+define <16 x i8> @smaxqv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: smaxqv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxqv v0.16b, p0, z0.b
+; CHECK-NEXT:    ret
+  %res = call <16 x i8> @llvm.aarch64.sve.smaxqv.v16i8.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @smaxqv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: smaxqv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @smaxqv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: smaxqv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.aarch64.sve.smaxqv.v4i32.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a);
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @smaxqv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: smaxqv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smaxqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x i64> @llvm.aarch64.sve.smaxqv.v2i64.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a);
+  ret <2 x i64> %res
+}
+
+;
+; UMAXQV
+;
+
+define <16 x i8> @umaxqv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: umaxqv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxqv v0.16b, p0, z0.b
+; CHECK-NEXT:    ret
+  %res = call <16 x i8> @llvm.aarch64.sve.umaxqv.v16i8.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @umaxqv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: umaxqv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x i16> @llvm.aarch64.sve.umaxqv.v8i16.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @umaxqv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: umaxqv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.aarch64.sve.umaxqv.v4i32.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a);
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @umaxqv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: umaxqv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umaxqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x i64> @llvm.aarch64.sve.umaxqv.v2i64.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a);
+  ret <2 x i64> %res
+}
+
+;
+; SMINQV
+;
+
+define <16 x i8> @sminqv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: sminqv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminqv v0.16b, p0, z0.b
+; CHECK-NEXT:    ret
+  %res = call <16 x i8> @llvm.aarch64.sve.sminqv.v16i8.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @sminqv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: sminqv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x i16> @llvm.aarch64.sve.sminqv.v8i16.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @sminqv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: sminqv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.aarch64.sve.sminqv.v4i32.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a);
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @sminqv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: sminqv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sminqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x i64> @llvm.aarch64.sve.sminqv.v2i64.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a);
+  ret <2 x i64> %res
+}
+
+;
+; UMINQV
+;
+
+define <16 x i8> @uminqv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: uminqv_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminqv v0.16b, p0, z0.b
+; CHECK-NEXT:    ret
+  %res = call <16 x i8> @llvm.aarch64.sve.uminqv.v16i8.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a);
+  ret <16 x i8> %res
+}
+
+define <8 x i16> @uminqv_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: uminqv_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminqv v0.8h, p0, z0.h
+; CHECK-NEXT:    ret
+  %res = call <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a);
+  ret <8 x i16> %res
+}
+
+define <4 x i32> @uminqv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: uminqv_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminqv v0.4s, p0, z0.s
+; CHECK-NEXT:    ret
+  %res = call <4 x i32> @llvm.aarch64.sve.uminqv.v4i32.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a);
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @uminqv_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: uminqv_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uminqv v0.2d, p0, z0.d
+; CHECK-NEXT:    ret
+  %res = call <2 x i64> @llvm.aarch64.sve.uminqv.v2i64.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a);
+  ret <2 x i64> %res
+}
+
+declare <16 x i8> @llvm.aarch64.sve.orqv.v16i8.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <8 x i16> @llvm.aarch64.sve.orqv.v8i16.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <4 x i32> @llvm.aarch64.sve.orqv.v4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <2 x i64> @llvm.aarch64.sve.orqv.v2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <16 x i8> @llvm.aarch64.sve.eorqv.v16i8.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <8 x i16> @llvm.aarch64.sve.eorqv.v8i16.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <4 x i32> @llvm.aarch64.sve.eorqv.v4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <2 x i64> @llvm.aarch64.sve.eorqv.v2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <8 x i16> @llvm.aarch64.sve.andqv.v8i16.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <4 x i32> @llvm.aarch64.sve.andqv.v4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <2 x i64> @llvm.aarch64.sve.andqv.v2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <16 x i8> @llvm.aarch64.sve.addqv.v16i8.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <8 x i16> @llvm.aarch64.sve.addqv.v8i16.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <4 x i32> @llvm.aarch64.sve.addqv.v4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <2 x i64> @llvm.aarch64.sve.addqv.v2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <16 x i8> @llvm.aarch64.sve.smaxqv.v16i8.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <4 x i32> @llvm.aarch64.sve.smaxqv.v4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <2 x i64> @llvm.aarch64.sve.smaxqv.v2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <16 x i8> @llvm.aarch64.sve.umaxqv.v16i8.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <8 x i16> @llvm.aarch64.sve.umaxqv.v8i16.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <4 x i32> @llvm.aarch64.sve.umaxqv.v4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <2 x i64> @llvm.aarch64.sve.umaxqv.v2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <16 x i8> @llvm.aarch64.sve.sminqv.v16i8.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <8 x i16> @llvm.aarch64.sve.sminqv.v8i16.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <4 x i32> @llvm.aarch64.sve.sminqv.v4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <2 x i64> @llvm.aarch64.sve.sminqv.v2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <16 x i8> @llvm.aarch64.sve.uminqv.v16i8.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <4 x i32> @llvm.aarch64.sve.uminqv.v4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <2 x i64> @llvm.aarch64.sve.uminqv.v2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)

>From 9b196351c7f4476cc96122aa3666ea1909d8bbe4 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Mon, 23 Oct 2023 12:52:48 +0000
Subject: [PATCH 2/6] [SVE2.1][Clang][LLVM]Int/FP reduce builtin in Clang and
 LLVM intrinsic

Add a new header for neon and sve

This patch implements the builtins in Clang
and the LLVM-IR intrinsic for the following:

// Variants are also available for:
// _s8, _s16, _u16, _s32, _u32, _s64, _u64,
// _f16, _f32, _f64uint8x16_t svaddqv[_u8](svbool_t pg, svuint8_t zn);

// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64
uint8x16_t svandqv[_u8](svbool_t pg, svuint8_t zn);
uint8x16_t sveorqv[_u8](svbool_t pg, svuint8_t zn);
uint8x16_t svorqv[_u8](svbool_t pg, svuint8_t zn);

// Variants are also available for:
// _s8, _u16, _s16, _u32, _s32, _u64, _s64;
uint8x16_t svmaxqv[_u8](svbool_t pg, svuint8_t zn);
uint8x16_t svminqv[_u8](svbool_t pg, svuint8_t zn);

// Variants are also available for _f32, _f64
float16x8_t svmaxnmqv[_f16](svbool_t pg, svfloat16_t zn);
float16x8_t svminnmqv[_f16](svbool_t pg, svfloat16_t zn);

According to the PR#257[1]

The reduction instruction uses scalable vectors as input and fixed vectors
as output, therefore we changed SVEEmitter to emit fixed vector types in case
the neon header(arm_neon.h) is not present.

[1]https://github.com/ARM-software/acle/pull/257

Co-author by: Dinar Temirbulatov <dinar.temirbulatov at arm.com>
---
 clang/docs/tools/clang-formatted-files.txt    |  1 +
 clang/lib/Headers/CMakeLists.txt              |  1 +
 clang/lib/Headers/arm_neon_types.h            | 60 +++++++++++++++++++
 .../acle_sve2p1_int_reduce.c                  |  1 -
 clang/utils/TableGen/NeonEmitter.cpp          | 27 ++-------
 clang/utils/TableGen/SveEmitter.cpp           | 23 +------
 6 files changed, 69 insertions(+), 44 deletions(-)
 create mode 100644 clang/lib/Headers/arm_neon_types.h

diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt
index 48cd800bffd0046..79c0993ae8b6a70 100644
--- a/clang/docs/tools/clang-formatted-files.txt
+++ b/clang/docs/tools/clang-formatted-files.txt
@@ -478,6 +478,7 @@ clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp
 clang/lib/Frontend/SerializedDiagnosticReader.cpp
 clang/lib/Headers/amxintrin.h
 clang/lib/Headers/arm_neon_sve_bridge.h
+clang/lib/Headers/arm_neon_types.h
 clang/lib/Headers/avx512fp16intrin.h
 clang/lib/Headers/avx512vlfp16intrin.h
 clang/lib/Headers/builtins.h
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 02a0c81644b6c6d..aff3209faa8fc72 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -49,6 +49,7 @@ set(arm_only_files
 set(aarch64_only_files
   arm64intr.h
   arm_neon_sve_bridge.h
+  arm_neon_types.h
   )
 
 set(cuda_files
diff --git a/clang/lib/Headers/arm_neon_types.h b/clang/lib/Headers/arm_neon_types.h
new file mode 100644
index 000000000000000..626a01e31116a5f
--- /dev/null
+++ b/clang/lib/Headers/arm_neon_types.h
@@ -0,0 +1,60 @@
+/*===---- arm_neon_types.h - ARM NEON TYPES --------------------------------===
+ *
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __ARM_NEON_TYPES_H
+#define __ARM_NEON_TYPES_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifndef __ARM_NEON_H
+typedef __attribute__((vector_size(16))) int8_t int8x16_t;
+typedef __attribute__((vector_size(16))) int16_t int16x8_t;
+typedef __attribute__((vector_size(16))) int32_t int32x4_t;
+typedef __attribute__((vector_size(16))) int64_t int64x2_t;
+typedef __attribute__((vector_size(16))) uint8_t uint8x16_t;
+typedef __attribute__((vector_size(16))) uint16_t uint16x8_t;
+typedef __attribute__((vector_size(16))) uint32_t uint32x4_t;
+typedef __attribute__((vector_size(16))) uint64_t uint64x2_t;
+typedef __attribute__((vector_size(16))) float16_t float16x8_t;
+typedef __attribute__((vector_size(16))) float32_t float32x4_t;
+typedef __attribute__((vector_size(16))) float64_t float64x2_t;
+#else
+typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
+typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
+typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
+typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
+typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
+typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
+typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
+typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
+typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
+typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
+#ifdef __aarch64__
+typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
+#endif
+typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
+typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
+typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
+typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
+typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
+typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
+typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
+typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
+typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
+typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
+#ifdef __aarch64__
+typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
+#endif
+typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
+typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
+#endif
+#ifdef __cplusplus
+} // extern "C"
+#endif
+#endif //__ARM_NEON_TYPES_H
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_int_reduce.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_int_reduce.c
index d060339fe9a7fac..b395b4d1323ed5e 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_int_reduce.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_int_reduce.c
@@ -5,7 +5,6 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-#include <arm_neon.h>
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 3c891dbe9d7aa0f..eb606408721ef1e 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2233,34 +2233,18 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
   // Emit vector typedefs.
   bool InIfdef = false;
   for (auto &TS : TDTypeVec) {
-    bool IsA64 = false;
     Type T(TS, ".");
-    if (T.isDouble())
-      IsA64 = true;
 
-    if (InIfdef && !IsA64) {
-      OS << "#endif\n";
-      InIfdef = false;
-    }
-    if (!InIfdef && IsA64) {
-      OS << "#ifdef __aarch64__\n";
-      InIfdef = true;
-    }
-
-    if (T.isPoly())
-      OS << "typedef __attribute__((neon_polyvector_type(";
-    else
-      OS << "typedef __attribute__((neon_vector_type(";
+    if (!T.isPoly())
+      continue;
 
+    OS << "typedef __attribute__((neon_polyvector_type(";
     Type T2 = T;
     T2.makeScalar();
     OS << T.getNumElements() << "))) ";
     OS << T2.str();
     OS << " " << T.str() << ";\n";
   }
-  if (InIfdef)
-    OS << "#endif\n";
-  OS << "\n";
 
   // Emit struct typedefs.
   InIfdef = false;
@@ -2374,9 +2358,10 @@ void NeonEmitter::run(raw_ostream &OS) {
   OS << "typedef int16_t poly16_t;\n";
   OS << "typedef int64_t poly64_t;\n";
   OS << "#endif\n";
+  OS << "#include <arm_neon_types.h>\n";
 
-  emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS);
-
+  emitNeonTypeDefs("csilUcUsUiUlhfdPcQPcPsQPsPlQPlQcQsQiQlQUcQUsQUiQUlQhQfQd",
+                   OS);
   emitNeonTypeDefs("bQb", OS);
 
   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 3bfb66091a76dfd..b27579b19fff45d 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -468,8 +468,6 @@ std::string SVEType::str() const {
   else {
     if (isScalableVector() || isSvcount())
       S += "sv";
-    if (isFixedLengthVector())
-      S += "__sve_";
     if (!Signed && !isFloatingPoint())
       S += "u";
 
@@ -1234,26 +1232,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
   OS << "typedef __SVBFloat16_t svbfloat16_t;\n";
 
   OS << "#include <arm_bf16.h>\n\n";
-
-  OS << "typedef __attribute__((vector_size (16))) int8_t __sve_int8x16_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) int16_t __sve_int16x8_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) int32_t __sve_int32x4_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) int64_t __sve_int64x2_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) uint8_t __sve_uint8x16_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) uint16_t "
-        "__sve_uint16x8_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) uint32_t "
-        "__sve_uint32x4_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) uint64_t "
-        "__sve_uint64x2_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) float16_t "
-        "__sve_float16x8_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) float32_t "
-        "__sve_float32x4_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) float64_t "
-        "__sve_float64x2_t;\n";
-  OS << "typedef __attribute__((vector_size (16))) bfloat16_t "
-        "__sve_bfloat16x8;\n";
+  OS << "#include <arm_neon_types.h>\n";
 
   OS << "typedef __SVFloat32_t svfloat32_t;\n";
   OS << "typedef __SVFloat64_t svfloat64_t;\n";

>From e8ec7c8fa06c29ad85c7a6bd6b2fba44c3304aac Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Wed, 22 Nov 2023 10:03:50 +0000
Subject: [PATCH 3/6] [Clang][AArch64] Add  fix vector types to header into SVE

This patch is needed for the reduction instructions in sve2.1

It add ta new header to sve with all the fixed vector types.
The new types are only added if neon is not declared.
---
 clang/include/clang/Basic/arm_vector_type.td  |  13 ++
 clang/lib/Headers/CMakeLists.txt              |   3 +
 .../CodeGen/arm-vector_type-params-returns.c  | 113 ++++++++++++++++++
 clang/utils/TableGen/NeonEmitter.cpp          |  44 +++++++
 clang/utils/TableGen/SveEmitter.cpp           |   5 +-
 clang/utils/TableGen/TableGen.cpp             |  15 ++-
 clang/utils/TableGen/TableGenBackends.h       |   1 +
 7 files changed, 189 insertions(+), 5 deletions(-)
 create mode 100644 clang/include/clang/Basic/arm_vector_type.td
 create mode 100644 clang/test/CodeGen/arm-vector_type-params-returns.c

diff --git a/clang/include/clang/Basic/arm_vector_type.td b/clang/include/clang/Basic/arm_vector_type.td
new file mode 100644
index 000000000000000..5018b0cdfc13785
--- /dev/null
+++ b/clang/include/clang/Basic/arm_vector_type.td
@@ -0,0 +1,13 @@
+//===--- arm_vector_type.td - ARM Fixed vector types compiler interface ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the TableGen definitions from which the ARM BF16 header
+//  file will be generated.
+//
+//===----------------------------------------------------------------------===//
+include "arm_neon_incl.td"
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index aff3209faa8fc72..8e4c59eff90da30 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -384,6 +384,8 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD)
   clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h)
   # Generate arm_cde.h
   clang_generate_header(-gen-arm-cde-header arm_cde.td arm_cde.h)
+  # Generate arm_vector_type.h
+  clang_generate_header(-gen-arm-vector-type arm_vector_type.td arm_vector_type.h)
 
   # Add headers to target specific lists
   list(APPEND arm_common_generated_files
@@ -400,6 +402,7 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD)
     "${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h"
     "${CMAKE_CURRENT_BINARY_DIR}/arm_sme_draft_spec_subject_to_change.h"
     "${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/arm_vector_type.h"
     )
 endif()
 if(RISCV IN_LIST LLVM_TARGETS_TO_BUILD)
diff --git a/clang/test/CodeGen/arm-vector_type-params-returns.c b/clang/test/CodeGen/arm-vector_type-params-returns.c
new file mode 100644
index 000000000000000..48c19d01b6257cc
--- /dev/null
+++ b/clang/test/CodeGen/arm-vector_type-params-returns.c
@@ -0,0 +1,113 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -emit-llvm -O2 -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o - /dev/null %s
+#include <arm_sve.h>
+
+// function return types
+// CHECK-LABEL: define dso_local <8 x half> @test_ret_v8f16(
+// CHECK-SAME: <8 x half> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x half> [[V]]
+//
+float16x8_t test_ret_v8f16(float16x8_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <4 x float> @test_ret_v4f32(
+// CHECK-SAME: <4 x float> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x float> [[V]]
+//
+float32x4_t test_ret_v4f32(float32x4_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <2 x double> @test_ret_v2f64(
+// CHECK-SAME: <2 x double> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x double> [[V]]
+//
+float64x2_t test_ret_v2f64(float64x2_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <8 x bfloat> @test_ret_v8bf16(
+// CHECK-SAME: <8 x bfloat> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x bfloat> [[V]]
+//
+bfloat16x8_t test_ret_v8bf16(bfloat16x8_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_ret_v16s8(
+// CHECK-SAME: <16 x i8> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <16 x i8> [[V]]
+//
+int8x16_t test_ret_v16s8(int8x16_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_ret_v8s16(
+// CHECK-SAME: <8 x i16> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> [[V]]
+//
+int16x8_t test_ret_v8s16(int16x8_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <4 x i32> @test_ret_v32s4(
+// CHECK-SAME: <4 x i32> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> [[V]]
+//
+int32x4_t test_ret_v32s4(int32x4_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <2 x i64> @test_ret_v64s2(
+// CHECK-SAME: <2 x i64> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x i64> [[V]]
+//
+int64x2_t test_ret_v64s2(int64x2_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_ret_v16u8(
+// CHECK-SAME: <16 x i8> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <16 x i8> [[V]]
+//
+uint8x16_t test_ret_v16u8(uint8x16_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <8 x i16> @test_ret_v8u16(
+// CHECK-SAME: <8 x i16> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> [[V]]
+//
+uint16x8_t test_ret_v8u16(uint16x8_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <4 x i32> @test_ret_v32u4(
+// CHECK-SAME: <4 x i32> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> [[V]]
+//
+uint32x4_t test_ret_v32u4(uint32x4_t v) {
+  return v;
+}
+
+// CHECK-LABEL: define dso_local <2 x i64> @test_ret_v64u2(
+// CHECK-SAME: <2 x i64> noundef returned [[V:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x i64> [[V]]
+//
+uint64x2_t test_ret_v64u2(uint64x2_t v) {
+  return v;
+}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index eb606408721ef1e..ba05bae8e9d5ec2 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -593,6 +593,8 @@ class NeonEmitter {
   // Emit arm_bf16.h.inc
   void runBF16(raw_ostream &o);
 
+  void runVectorType(raw_ostream &o);
+
   // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
   // arm_bf16.h
   void runHeader(raw_ostream &o);
@@ -2531,6 +2533,44 @@ void NeonEmitter::runFP16(raw_ostream &OS) {
   OS << "#endif /* __ARM_FP16_H */\n";
 }
 
+void NeonEmitter::runVectorType(raw_ostream &OS) {
+  OS << "/*===---- arm_vector_type - ARM vector type "
+        "------===\n"
+        " *\n"
+        " *\n"
+        " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
+        "Exceptions.\n"
+        " * See https://llvm.org/LICENSE.txt for license information.\n"
+        " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
+        " *\n"
+        " *===-----------------------------------------------------------------"
+        "------===\n"
+        " */\n\n";
+  OS << "#ifndef __ARM_NEON_TYPES_H\n";
+  OS << "#define __ARM_NEON_TYPES_H\n";
+  OS << "#ifdef __cplusplus\n";
+  OS << "extern \"C\" {\n";
+  OS << "#endif\n";
+  OS << "#ifndef __ARM_NEON_H\n";
+
+  std::string TypedefTypes("QcQsQiQlQUcQUsQUiQUlQhQfQdQb");
+  std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
+  for (auto &TS : TDTypeVec) {
+    Type T(TS, ".");
+    OS << "typedef __attribute__((vector_size(16))) ";
+
+    Type T2 = T;
+    T2.makeScalar();
+    OS << T2.str();
+    OS << " " << T.str() << ";\n";
+  }
+  OS << "#endif\n";
+  OS << "#ifdef __cplusplus\n";
+  OS << "} // extern \"C\"\n";
+  OS << "#endif\n";
+  OS << "#endif //__ARM_NEON_TYPES_H\n";
+}
+
 void NeonEmitter::runBF16(raw_ostream &OS) {
   OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
         "-----------------------------------===\n"
@@ -2625,6 +2665,10 @@ void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
   NeonEmitter(Records).runHeader(OS);
 }
 
+void clang::EmitVectorType(RecordKeeper &Records, raw_ostream &OS) {
+  NeonEmitter(Records).runVectorType(OS);
+}
+
 void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
   llvm_unreachable("Neon test generation no longer implemented!");
 }
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b27579b19fff45d..1ca4273dc30dee7 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1231,8 +1231,8 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
 
   OS << "typedef __SVBFloat16_t svbfloat16_t;\n";
 
-  OS << "#include <arm_bf16.h>\n\n";
-  OS << "#include <arm_neon_types.h>\n";
+  OS << "#include <arm_bf16.h>\n";
+  OS << "#include <arm_vector_type.h>\n";
 
   OS << "typedef __SVFloat32_t svfloat32_t;\n";
   OS << "typedef __SVFloat64_t svfloat64_t;\n";
@@ -1705,4 +1705,5 @@ void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
 void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) {
   SVEEmitter(Records).createSMERangeChecks(OS);
 }
+
 } // End namespace clang
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 7efb6c731d3e5ee..66008ae0c2e3c14 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -73,6 +73,7 @@ enum ActionType {
   GenArmNeon,
   GenArmFP16,
   GenArmBF16,
+  GenArmVectorType,
   GenArmNeonSema,
   GenArmNeonTest,
   GenArmMveHeader,
@@ -229,6 +230,8 @@ cl::opt<ActionType> Action(
         clEnumValN(GenArmNeon, "gen-arm-neon", "Generate arm_neon.h for clang"),
         clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"),
         clEnumValN(GenArmBF16, "gen-arm-bf16", "Generate arm_bf16.h for clang"),
+        clEnumValN(GenArmVectorType, "gen-arm-vector-type",
+                   "Generate arm_vector_type.h for clang"),
         clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
                    "Generate ARM NEON sema support for clang"),
         clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
@@ -279,11 +282,14 @@ cl::opt<ActionType> Action(
                    "Generate riscv_vector_builtin_cg.inc for clang"),
         clEnumValN(GenRISCVVectorBuiltinSema, "gen-riscv-vector-builtin-sema",
                    "Generate riscv_vector_builtin_sema.inc for clang"),
-        clEnumValN(GenRISCVSiFiveVectorBuiltins, "gen-riscv-sifive-vector-builtins",
+        clEnumValN(GenRISCVSiFiveVectorBuiltins,
+                   "gen-riscv-sifive-vector-builtins",
                    "Generate riscv_sifive_vector_builtins.inc for clang"),
-        clEnumValN(GenRISCVSiFiveVectorBuiltinCG, "gen-riscv-sifive-vector-builtin-codegen",
+        clEnumValN(GenRISCVSiFiveVectorBuiltinCG,
+                   "gen-riscv-sifive-vector-builtin-codegen",
                    "Generate riscv_sifive_vector_builtin_cg.inc for clang"),
-        clEnumValN(GenRISCVSiFiveVectorBuiltinSema, "gen-riscv-sifive-vector-builtin-sema",
+        clEnumValN(GenRISCVSiFiveVectorBuiltinSema,
+                   "gen-riscv-sifive-vector-builtin-sema",
                    "Generate riscv_sifive_vector_builtin_sema.inc for clang"),
         clEnumValN(GenAttrDocs, "gen-attr-docs",
                    "Generate attribute documentation"),
@@ -449,6 +455,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenArmFP16:
     EmitFP16(Records, OS);
     break;
+  case GenArmVectorType:
+    EmitVectorType(Records, OS);
+    break;
   case GenArmBF16:
     EmitBF16(Records, OS);
     break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index d8f447069376bca..b5bf0b56043a8b7 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -97,6 +97,7 @@ void EmitNeon(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitFP16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitBF16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitNeonSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitVectorType(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitNeonTest(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
 void EmitSveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);

>From 436743700c92fc301a0d7df8b92d8a03cd28eecb Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Fri, 24 Nov 2023 12:06:56 +0000
Subject: [PATCH 4/6] Remove arm_neon_types.h

---
 clang/docs/tools/clang-formatted-files.txt |  1 -
 clang/lib/Headers/CMakeLists.txt           |  1 -
 clang/lib/Headers/arm_neon_types.h         | 60 ----------------------
 clang/utils/TableGen/NeonEmitter.cpp       | 25 +++++++--
 4 files changed, 20 insertions(+), 67 deletions(-)
 delete mode 100644 clang/lib/Headers/arm_neon_types.h

diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt
index 79c0993ae8b6a70..48cd800bffd0046 100644
--- a/clang/docs/tools/clang-formatted-files.txt
+++ b/clang/docs/tools/clang-formatted-files.txt
@@ -478,7 +478,6 @@ clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp
 clang/lib/Frontend/SerializedDiagnosticReader.cpp
 clang/lib/Headers/amxintrin.h
 clang/lib/Headers/arm_neon_sve_bridge.h
-clang/lib/Headers/arm_neon_types.h
 clang/lib/Headers/avx512fp16intrin.h
 clang/lib/Headers/avx512vlfp16intrin.h
 clang/lib/Headers/builtins.h
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 8e4c59eff90da30..4dc1ca500dbe9dc 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -49,7 +49,6 @@ set(arm_only_files
 set(aarch64_only_files
   arm64intr.h
   arm_neon_sve_bridge.h
-  arm_neon_types.h
   )
 
 set(cuda_files
diff --git a/clang/lib/Headers/arm_neon_types.h b/clang/lib/Headers/arm_neon_types.h
deleted file mode 100644
index 626a01e31116a5f..000000000000000
--- a/clang/lib/Headers/arm_neon_types.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*===---- arm_neon_types.h - ARM NEON TYPES --------------------------------===
- *
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-#ifndef __ARM_NEON_TYPES_H
-#define __ARM_NEON_TYPES_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifndef __ARM_NEON_H
-typedef __attribute__((vector_size(16))) int8_t int8x16_t;
-typedef __attribute__((vector_size(16))) int16_t int16x8_t;
-typedef __attribute__((vector_size(16))) int32_t int32x4_t;
-typedef __attribute__((vector_size(16))) int64_t int64x2_t;
-typedef __attribute__((vector_size(16))) uint8_t uint8x16_t;
-typedef __attribute__((vector_size(16))) uint16_t uint16x8_t;
-typedef __attribute__((vector_size(16))) uint32_t uint32x4_t;
-typedef __attribute__((vector_size(16))) uint64_t uint64x2_t;
-typedef __attribute__((vector_size(16))) float16_t float16x8_t;
-typedef __attribute__((vector_size(16))) float32_t float32x4_t;
-typedef __attribute__((vector_size(16))) float64_t float64x2_t;
-#else
-typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
-typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
-typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
-typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
-typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
-typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
-typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
-typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
-typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
-typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
-#ifdef __aarch64__
-typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
-#endif
-typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
-typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
-typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
-typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
-typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
-typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
-typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
-typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
-typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
-typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
-#ifdef __aarch64__
-typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
-#endif
-typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t;
-typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t;
-#endif
-#ifdef __cplusplus
-} // extern "C"
-#endif
-#endif //__ARM_NEON_TYPES_H
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index ba05bae8e9d5ec2..03ed20d848ada92 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2235,18 +2235,33 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
   // Emit vector typedefs.
   bool InIfdef = false;
   for (auto &TS : TDTypeVec) {
+    bool IsA64 = false;
     Type T(TS, ".");
+ if (T.isDouble())
+      IsA64 = true;
 
-    if (!T.isPoly())
-      continue;
+    if (InIfdef && !IsA64) {
+      OS << "#endif\n";
+      InIfdef = false;
+    }
+    if (!InIfdef && IsA64) {
+      OS << "#ifdef __aarch64__\n";
+      InIfdef = true;
+    }
 
-    OS << "typedef __attribute__((neon_polyvector_type(";
+    if (T.isPoly())
+      OS << "typedef __attribute__((neon_polyvector_type(";
+    else
+      OS << "typedef __attribute__((neon_vector_type(";
     Type T2 = T;
     T2.makeScalar();
     OS << T.getNumElements() << "))) ";
     OS << T2.str();
     OS << " " << T.str() << ";\n";
   }
+  if (InIfdef)
+    OS << "#endif\n";
+  OS << "\n";
 
   // Emit struct typedefs.
   InIfdef = false;
@@ -2362,8 +2377,8 @@ void NeonEmitter::run(raw_ostream &OS) {
   OS << "#endif\n";
   OS << "#include <arm_neon_types.h>\n";
 
-  emitNeonTypeDefs("csilUcUsUiUlhfdPcQPcPsQPsPlQPlQcQsQiQlQUcQUsQUiQUlQhQfQd",
-                   OS);
+  emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS);
+
   emitNeonTypeDefs("bQb", OS);
 
   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "

>From 32133d9f3413208a907b436e10aff61640482b9d Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Fri, 24 Nov 2023 14:22:01 +0000
Subject: [PATCH 5/6] Fix missing arm_neon_types.h

Remove arm_neon_types.h from NeonEmmiter
---
 clang/utils/TableGen/NeonEmitter.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 03ed20d848ada92..1f5deaad176f348 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2375,7 +2375,6 @@ void NeonEmitter::run(raw_ostream &OS) {
   OS << "typedef int16_t poly16_t;\n";
   OS << "typedef int64_t poly64_t;\n";
   OS << "#endif\n";
-  OS << "#include <arm_neon_types.h>\n";
 
   emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS);
 

>From c15c8ba81c9d4167daccdcc99ca844b55c77c6e8 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Thu, 30 Nov 2023 18:40:35 +0000
Subject: [PATCH 6/6] Fix header arm_vector_type to work with any header
 combination

Now any combination between arm_neon.h and arm_sve.h should work
---
 ...arm_vector_type.td => arm_vector_types.td} |  2 +-
 clang/lib/Headers/CMakeLists.txt              |  6 +--
 .../CodeGen/arm-vector_type-params-returns.c  | 27 +++++++++++--
 clang/utils/TableGen/NeonEmitter.cpp          | 38 +++++++++++++------
 clang/utils/TableGen/SveEmitter.cpp           |  2 +-
 clang/utils/TableGen/TableGen.cpp             |  2 +-
 6 files changed, 57 insertions(+), 20 deletions(-)
 rename clang/include/clang/Basic/{arm_vector_type.td => arm_vector_types.td} (86%)

diff --git a/clang/include/clang/Basic/arm_vector_type.td b/clang/include/clang/Basic/arm_vector_types.td
similarity index 86%
rename from clang/include/clang/Basic/arm_vector_type.td
rename to clang/include/clang/Basic/arm_vector_types.td
index 5018b0cdfc13785..7b43b62f9e066c7 100644
--- a/clang/include/clang/Basic/arm_vector_type.td
+++ b/clang/include/clang/Basic/arm_vector_types.td
@@ -1,4 +1,4 @@
-//===--- arm_vector_type.td - ARM Fixed vector types compiler interface ---===//
+//===--- arm_vector_types.td - ARM Fixed vector types compiler interface ---===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 4dc1ca500dbe9dc..fbf2ad464f7fa7b 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -383,8 +383,8 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD)
   clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h)
   # Generate arm_cde.h
   clang_generate_header(-gen-arm-cde-header arm_cde.td arm_cde.h)
-  # Generate arm_vector_type.h
-  clang_generate_header(-gen-arm-vector-type arm_vector_type.td arm_vector_type.h)
+  # Generate arm_vector_types.h
+  clang_generate_header(-gen-arm-vector-type arm_vector_types.td arm_vector_types.h)
 
   # Add headers to target specific lists
   list(APPEND arm_common_generated_files
@@ -401,7 +401,7 @@ if(ARM IN_LIST LLVM_TARGETS_TO_BUILD OR AArch64 IN_LIST LLVM_TARGETS_TO_BUILD)
     "${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h"
     "${CMAKE_CURRENT_BINARY_DIR}/arm_sme_draft_spec_subject_to_change.h"
     "${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h"
-    "${CMAKE_CURRENT_BINARY_DIR}/arm_vector_type.h"
+    "${CMAKE_CURRENT_BINARY_DIR}/arm_vector_types.h"
     )
 endif()
 if(RISCV IN_LIST LLVM_TARGETS_TO_BUILD)
diff --git a/clang/test/CodeGen/arm-vector_type-params-returns.c b/clang/test/CodeGen/arm-vector_type-params-returns.c
index 48c19d01b6257cc..61b617083515a7c 100644
--- a/clang/test/CodeGen/arm-vector_type-params-returns.c
+++ b/clang/test/CodeGen/arm-vector_type-params-returns.c
@@ -1,7 +1,28 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -emit-llvm -O2 -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o - /dev/null %s
-#include <arm_sve.h>
+
+// RUN: %clang_cc1 -DSVE_HEADER -triple aarch64 -target-feature +sve -emit-llvm -O2 -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -DSVE_HEADER -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o - /dev/null %s
+
+// RUN: %clang_cc1 -DNEON_HEADER -triple aarch64 -target-feature +sve -emit-llvm -O2 -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -DNEON_HEADER -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o - /dev/null %s
+
+// RUN: %clang_cc1 -DSVE_HEADER -DNEON_HEADER -triple aarch64 -target-feature +sve -emit-llvm -O2 -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -DSVE_HEADER -DNEON_HEADER -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o - /dev/null %s
+
+// RUN: %clang_cc1 -DNEON_HEADER -DSVE_HEADER2  -triple aarch64 -target-feature +sve -emit-llvm -O2 -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -DNEON_HEADER -DSVE_HEADER2 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o - /dev/null %s
+
+#ifdef SVE_HEADER
+  #include <arm_sve.h>
+#endif
+
+#ifdef NEON_HEADER
+  #include <arm_neon.h>
+#endif
+
+#ifdef SVE_HEADER_2
+  #include <arm_sve.h>
+#endif
 
 // function return types
 // CHECK-LABEL: define dso_local <8 x half> @test_ret_v8f16(
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 1f5deaad176f348..fd9afa3118d927e 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2231,6 +2231,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) {
 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
   std::string TypedefTypes(types);
   std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
+  // arm_sve.h followed by arm_neon.h does not emmit these types
+  // because only arm_sve.h defines __ARM_NEON_TYPES_H
+  // arm_neon.h followed by arm_sve.h emmit these types
+  // because __ARM_NEON_TYPES_H is not defined
+  // Avoids to redeclare the types in arm_neon.h
+  OS << "#ifndef __ARM_NEON_TYPES_H\n";
 
   // Emit vector typedefs.
   bool InIfdef = false;
@@ -2262,6 +2268,7 @@ static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
   if (InIfdef)
     OS << "#endif\n";
   OS << "\n";
+  OS << "#endif // __ARM_NEON_TYPES_H\n";
 
   // Emit struct typedefs.
   InIfdef = false;
@@ -2548,7 +2555,7 @@ void NeonEmitter::runFP16(raw_ostream &OS) {
 }
 
 void NeonEmitter::runVectorType(raw_ostream &OS) {
-  OS << "/*===---- arm_vector_type - ARM vector type "
+  OS << "/*===---- arm_vector_types - ARM vector type "
         "------===\n"
         " *\n"
         " *\n"
@@ -2560,29 +2567,38 @@ void NeonEmitter::runVectorType(raw_ostream &OS) {
         " *===-----------------------------------------------------------------"
         "------===\n"
         " */\n\n";
+  OS << "#ifndef __ARM_NEON_H\n\n";
   OS << "#ifndef __ARM_NEON_TYPES_H\n";
   OS << "#define __ARM_NEON_TYPES_H\n";
-  OS << "#ifdef __cplusplus\n";
-  OS << "extern \"C\" {\n";
+  OS << "#ifdef __aarch64__\n";
+  OS << "typedef uint8_t poly8_t;\n";
+  OS << "typedef uint16_t poly16_t;\n";
+  OS << "typedef uint64_t poly64_t;\n";
+  OS << "typedef __uint128_t poly128_t;\n";
+  OS << "#else\n";
+  OS << "typedef int8_t poly8_t;\n";
+  OS << "typedef int16_t poly16_t;\n";
   OS << "#endif\n";
-  OS << "#ifndef __ARM_NEON_H\n";
 
-  std::string TypedefTypes("QcQsQiQlQUcQUsQUiQUlQhQfQdQb");
+  // Needs to declare all the types in case there is arm_sve.h followed by
+  // arm_neon.h.
+  // arm_sve defines __ARM_NEON_TYPES_H so it avoids to declare again the
+  // types in arm_neon.h
+  std::string TypedefTypes(
+      "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPlbQb");
   std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
   for (auto &TS : TDTypeVec) {
     Type T(TS, ".");
-    OS << "typedef __attribute__((vector_size(16))) ";
+    OS << "typedef __attribute__((vector_size(";
 
+    OS << T.getSizeInBits() / 8 << ")))";
     Type T2 = T;
     T2.makeScalar();
     OS << T2.str();
     OS << " " << T.str() << ";\n";
   }
-  OS << "#endif\n";
-  OS << "#ifdef __cplusplus\n";
-  OS << "} // extern \"C\"\n";
-  OS << "#endif\n";
-  OS << "#endif //__ARM_NEON_TYPES_H\n";
+  OS << "#endif // __ARM_NEON_TYPES_H\n";
+  OS << "#endif // __ARM_NEON_H\n";
 }
 
 void NeonEmitter::runBF16(raw_ostream &OS) {
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 1ca4273dc30dee7..402718325c1066d 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1232,7 +1232,7 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
   OS << "typedef __SVBFloat16_t svbfloat16_t;\n";
 
   OS << "#include <arm_bf16.h>\n";
-  OS << "#include <arm_vector_type.h>\n";
+  OS << "#include <arm_vector_types.h>\n";
 
   OS << "typedef __SVFloat32_t svfloat32_t;\n";
   OS << "typedef __SVFloat64_t svfloat64_t;\n";
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 66008ae0c2e3c14..8a183531a61d7db 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -231,7 +231,7 @@ cl::opt<ActionType> Action(
         clEnumValN(GenArmFP16, "gen-arm-fp16", "Generate arm_fp16.h for clang"),
         clEnumValN(GenArmBF16, "gen-arm-bf16", "Generate arm_bf16.h for clang"),
         clEnumValN(GenArmVectorType, "gen-arm-vector-type",
-                   "Generate arm_vector_type.h for clang"),
+                   "Generate arm_vector_types.h for clang"),
         clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
                    "Generate ARM NEON sema support for clang"),
         clEnumValN(GenArmNeonTest, "gen-arm-neon-test",



More information about the cfe-commits mailing list