[llvm] 54744bc - [LLVM][AArch64] Add "u" variants of sve.[s,u]hsub intrinsics (#170894)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 03:22:40 PST 2025
Author: Paul Walker
Date: 2025-12-12T11:22:34Z
New Revision: 54744bc0a608729cd09ca9c2d03474aa2be727b8
URL: https://github.com/llvm/llvm-project/commit/54744bc0a608729cd09ca9c2d03474aa2be727b8
DIFF: https://github.com/llvm/llvm-project/commit/54744bc0a608729cd09ca9c2d03474aa2be727b8.diff
LOG: [LLVM][AArch64] Add "u" variants of sve.[s,u]hsub intrinsics (#170894)
Added:
Modified:
clang/include/clang/Basic/arm_sve.td
clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c
clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/AArch64SchedA320.td
llvm/lib/Target/AArch64/AArch64SchedA510.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index ed41b43162109..a4a0ee7ed5f5f 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1292,10 +1292,10 @@ defm SVQSUB_S : SInstZPZZ<"svqsub", "csli", "aarch64_sve_sqsub", "aarch64
defm SVQSUB_U : SInstZPZZ<"svqsub", "UcUsUiUl", "aarch64_sve_uqsub", "aarch64_sve_uqsub_u">;
defm SVQSUBR_S : SInstZPZZ<"svqsubr", "csli", "aarch64_sve_sqsubr", "aarch64_sve_sqsub_u", [ReverseMergeAnyBinOp]>;
defm SVQSUBR_U : SInstZPZZ<"svqsubr", "UcUsUiUl", "aarch64_sve_uqsubr", "aarch64_sve_uqsub_u", [ReverseMergeAnyBinOp]>;
-defm SVHSUB_S : SInstZPZZ<"svhsub", "csli", "aarch64_sve_shsub", "aarch64_sve_shsub">;
-defm SVHSUB_U : SInstZPZZ<"svhsub", "UcUsUiUl", "aarch64_sve_uhsub", "aarch64_sve_uhsub">;
-defm SVHSUBR_S : SInstZPZZ<"svhsubr", "csli", "aarch64_sve_shsubr", "aarch64_sve_shsubr">;
-defm SVHSUBR_U : SInstZPZZ<"svhsubr", "UcUsUiUl", "aarch64_sve_uhsubr", "aarch64_sve_uhsubr">;
+defm SVHSUB_S : SInstZPZZ<"svhsub", "csli", "aarch64_sve_shsub", "aarch64_sve_shsub_u">;
+defm SVHSUB_U : SInstZPZZ<"svhsub", "UcUsUiUl", "aarch64_sve_uhsub", "aarch64_sve_uhsub_u">;
+defm SVHSUBR_S : SInstZPZZ<"svhsubr", "csli", "aarch64_sve_shsubr", "aarch64_sve_shsub_u", [ReverseMergeAnyBinOp]>;
+defm SVHSUBR_U : SInstZPZZ<"svhsubr", "UcUsUiUl", "aarch64_sve_uhsubr", "aarch64_sve_uhsub_u", [ReverseMergeAnyBinOp]>;
defm SVQABS : SInstZPZ<"svqabs", "csil", "aarch64_sve_sqabs">;
defm SVQNEG : SInstZPZ<"svqneg", "csil", "aarch64_sve_sqneg">;
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c
index 22c11466a7288..e101dd2fe3399 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c
@@ -297,12 +297,12 @@ svuint64_t test_svhsub_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
// CHECK-LABEL: @test_svhsub_s8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svhsub_s8_xu10__SVBool_tu10__SVInt8_tS0_(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svhsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
@@ -313,13 +313,13 @@ svint8_t test_svhsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
// CHECK-LABEL: @test_svhsub_s16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svhsub_s16_xu10__SVBool_tu11__SVInt16_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svhsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
@@ -330,13 +330,13 @@ svint16_t test_svhsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
// CHECK-LABEL: @test_svhsub_s32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svhsub_s32_xu10__SVBool_tu11__SVInt32_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svhsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
@@ -347,13 +347,13 @@ svint32_t test_svhsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
// CHECK-LABEL: @test_svhsub_s64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svhsub_s64_xu10__SVBool_tu11__SVInt64_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svhsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
@@ -363,12 +363,12 @@ svint64_t test_svhsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
// CHECK-LABEL: @test_svhsub_u8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svhsub_u8_xu10__SVBool_tu11__SVUint8_tS0_(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svhsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
@@ -379,13 +379,13 @@ svuint8_t test_svhsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
// CHECK-LABEL: @test_svhsub_u16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svhsub_u16_xu10__SVBool_tu12__SVUint16_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svhsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
@@ -396,13 +396,13 @@ svuint16_t test_svhsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
// CHECK-LABEL: @test_svhsub_u32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svhsub_u32_xu10__SVBool_tu12__SVUint32_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svhsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
@@ -413,13 +413,13 @@ svuint32_t test_svhsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
// CHECK-LABEL: @test_svhsub_u64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svhsub_u64_xu10__SVBool_tu12__SVUint64_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svhsub_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
@@ -775,14 +775,14 @@ svuint64_t test_svhsub_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z18test_svhsub_n_s8_xu10__SVBool_tu10__SVInt8_ta(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svhsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
@@ -795,7 +795,7 @@ svint8_t test_svhsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svhsub_n_s16_xu10__SVBool_tu11__SVInt16_ts(
@@ -803,7 +803,7 @@ svint8_t test_svhsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svhsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
@@ -816,7 +816,7 @@ svint16_t test_svhsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svhsub_n_s32_xu10__SVBool_tu11__SVInt32_ti(
@@ -824,7 +824,7 @@ svint16_t test_svhsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svhsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
@@ -837,7 +837,7 @@ svint32_t test_svhsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svhsub_n_s64_xu10__SVBool_tu11__SVInt64_tl(
@@ -845,7 +845,7 @@ svint32_t test_svhsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svhsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
@@ -857,14 +857,14 @@ svint64_t test_svhsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z18test_svhsub_n_u8_xu10__SVBool_tu11__SVUint8_th(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svhsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
@@ -877,7 +877,7 @@ svuint8_t test_svhsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svhsub_n_u16_xu10__SVBool_tu12__SVUint16_tt(
@@ -885,7 +885,7 @@ svuint8_t test_svhsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svhsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
@@ -898,7 +898,7 @@ svuint16_t test_svhsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svhsub_n_u32_xu10__SVBool_tu12__SVUint32_tj(
@@ -906,7 +906,7 @@ svuint16_t test_svhsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svhsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
@@ -919,7 +919,7 @@ svuint32_t test_svhsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svhsub_n_u64_xu10__SVBool_tu12__SVUint64_tm(
@@ -927,7 +927,7 @@ svuint32_t test_svhsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svhsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c
index 236ee3b5320e1..0f4fd0d84aff4 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c
@@ -297,12 +297,12 @@ svuint64_t test_svhsubr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
// CHECK-LABEL: @test_svhsubr_s8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_svhsubr_s8_xu10__SVBool_tu10__SVInt8_tS0_(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svhsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
@@ -313,13 +313,13 @@ svint8_t test_svhsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
// CHECK-LABEL: @test_svhsubr_s16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svhsubr_s16_xu10__SVBool_tu11__SVInt16_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svhsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
@@ -330,13 +330,13 @@ svint16_t test_svhsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
// CHECK-LABEL: @test_svhsubr_s32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svhsubr_s32_xu10__SVBool_tu11__SVInt32_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svhsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
@@ -347,13 +347,13 @@ svint32_t test_svhsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
// CHECK-LABEL: @test_svhsubr_s64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svhsubr_s64_xu10__SVBool_tu11__SVInt64_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svhsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
@@ -363,12 +363,12 @@ svint64_t test_svhsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
// CHECK-LABEL: @test_svhsubr_u8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_svhsubr_u8_xu10__SVBool_tu11__SVUint8_tS0_(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svhsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
@@ -379,13 +379,13 @@ svuint8_t test_svhsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
// CHECK-LABEL: @test_svhsubr_u16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svhsubr_u16_xu10__SVBool_tu12__SVUint16_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svhsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
@@ -396,13 +396,13 @@ svuint16_t test_svhsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
// CHECK-LABEL: @test_svhsubr_u32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svhsubr_u32_xu10__SVBool_tu12__SVUint32_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svhsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
@@ -413,13 +413,13 @@ svuint32_t test_svhsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
// CHECK-LABEL: @test_svhsubr_u64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svhsubr_u64_xu10__SVBool_tu12__SVUint64_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svhsubr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
@@ -775,14 +775,14 @@ svuint64_t test_svhsubr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svhsubr_n_s8_xu10__SVBool_tu10__SVInt8_ta(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svhsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
@@ -795,7 +795,7 @@ svint8_t test_svhsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svhsubr_n_s16_xu10__SVBool_tu11__SVInt16_ts(
@@ -803,7 +803,7 @@ svint8_t test_svhsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svhsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
@@ -816,7 +816,7 @@ svint16_t test_svhsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svhsubr_n_s32_xu10__SVBool_tu11__SVInt32_ti(
@@ -824,7 +824,7 @@ svint16_t test_svhsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svhsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
@@ -837,7 +837,7 @@ svint32_t test_svhsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svhsubr_n_s64_xu10__SVBool_tu11__SVInt64_tl(
@@ -845,7 +845,7 @@ svint32_t test_svhsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svhsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
@@ -857,14 +857,14 @@ svint64_t test_svhsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svhsubr_n_u8_xu10__SVBool_tu11__SVUint8_th(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svhsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
@@ -877,7 +877,7 @@ svuint8_t test_svhsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svhsubr_n_u16_xu10__SVBool_tu12__SVUint16_tt(
@@ -885,7 +885,7 @@ svuint8_t test_svhsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svhsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
@@ -898,7 +898,7 @@ svuint16_t test_svhsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svhsubr_n_u32_xu10__SVBool_tu12__SVUint32_tj(
@@ -906,7 +906,7 @@ svuint16_t test_svhsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svhsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
@@ -919,7 +919,7 @@ svuint32_t test_svhsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svhsubr_n_u64_xu10__SVBool_tu12__SVUint64_tm(
@@ -927,7 +927,7 @@ svuint32_t test_svhsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svhsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index c905357d16fe4..7ec39257c0585 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2435,6 +2435,7 @@ def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intri
def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_shsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sli : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sqabs : AdvSIMD_Merged1VectorArg_Intrinsic<[IntrSpeculatable]>;
@@ -2467,6 +2468,7 @@ def int_aarch64_sve_suqadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpecul
def int_aarch64_sve_uaba : AdvSIMD_3VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uhadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_uhsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 78b3178a96604..33c71bf976672 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3838,12 +3838,15 @@ let Predicates = [HasSVE2_or_SME] in {
// SVE2 integer halving add/subtract (predicated)
defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", AArch64shadd>;
defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", AArch64uhadd>;
- defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub>;
- defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub>;
+ defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub, "SHSUB_ZPZZ", DestructiveBinaryCommWithRev, "SHSUBR_ZPmZ">;
+ defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub, "UHSUB_ZPZZ", DestructiveBinaryCommWithRev, "UHSUBR_ZPmZ">;
defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", AArch64srhadd>;
defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", AArch64urhadd>;
- defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr>;
- defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr>;
+ defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr, "SHSUBR_ZPZZ", DestructiveBinaryCommWithRev, "SHSUB_ZPmZ", /*isReverseInstr*/ 1>;
+ defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr, "UHSUBR_ZPZZ", DestructiveBinaryCommWithRev, "UHSUB_ZPmZ", /*isReverseInstr*/ 1>;
+
+ defm SHSUB_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_shsub_u>;
+ defm UHSUB_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_uhsub_u>;
// SVE2 integer pairwise add and accumulate long
defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA320.td b/llvm/lib/Target/AArch64/AArch64SchedA320.td
index 49f5c544c8f8a..a3f02303b2df6 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA320.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA320.td
@@ -674,7 +674,7 @@ def : InstRW<[CortexA320Write<3, CortexA320UnitVALU>],
"^(ADD|SUB|SUBR)_ZI_[BHSD]",
"^ADR_[SU]XTW_ZZZ_D_[0123]",
"^ADR_LSL_ZZZ_[SD]_[0123]",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]")>;
+ "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]")>;
def : InstRW<[CortexA320Write<4, CortexA320UnitVALU>],
(instregex "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
"^SADDLBT_ZZZ_[HSD]",
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
index 19585f030a608..e2e2214dd28ba 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA510.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -652,7 +652,7 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
"^(ADD|SUB|SUBR)_ZI_[BHSD]",
"^ADR_[SU]XTW_ZZZ_D_[0123]",
"^ADR_LSL_ZZZ_[SD]_[0123]",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]")>;
+ "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]")>;
def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
(instregex "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
"^SADDLBT_ZZZ_[HSD]",
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index f275b0d2b39dc..028b6353af258 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -1662,7 +1662,7 @@ def : InstRW<[N2Write_2c_1V],
"^ADR_LSL_ZZZ_[SD]_[0123]",
"^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
"^SADDLBT_ZZZ_[HSD]",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]",
"^SSUBL(BT|TB)_ZZZ_[HSD]")>;
// Arithmetic, complex
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
index 33ddae252c36c..b38ca140fe2c7 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
@@ -1752,7 +1752,7 @@ def : InstRW<[N3Write_2c_1V],
"^ADR_LSL_ZZZ_[SD]_[0123]",
"^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
"^SADDLBT_ZZZ_[HSD]",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]",
"^SSUBL(BT|TB)_ZZZ_[HSD]")>;
// Arithmetic, complex
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 373a5dc22e187..b718d14db43ba 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -2125,7 +2125,7 @@ def : InstRW<[V2Write_2c_1V],
"^ADR_LSL_ZZZ_[SD]_[0123]",
"^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
"^SADDLBT_ZZZ_[HSD]",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]",
"^SSUBL(BT|TB)_ZZZ_[HSD]")>;
// Arithmetic, complex
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
index 67c77dcc223b1..308b81cbfa40f 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
@@ -2054,7 +2054,7 @@ def : InstRW<[V3Write_2c_1V],
"^ADR_LSL_ZZZ_[SD]_[0123]",
"^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
"^SADDLBT_ZZZ_[HSD]",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]",
"^SSUBL(BT|TB)_ZZZ_[HSD]")>;
// Arithmetic, complex
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
index 20e733feb32f0..f64e87902a5f8 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
@@ -1982,7 +1982,7 @@ def : InstRW<[V3AEWrite_2c_1V],
"^ADR_LSL_ZZZ_[SD]_[0123]",
"^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
"^SADDLBT_ZZZ_[HSD]",
- "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]",
"^SSUBL(BT|TB)_ZZZ_[HSD]")>;
// Arithmetic, complex
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b8d408dc6623b..60d0cc3f98730 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1470,6 +1470,10 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
case Intrinsic::aarch64_sve_orr:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_orr_u)
.setMatchingIROpcode(Instruction::Or);
+ case Intrinsic::aarch64_sve_shsub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_shsub_u);
+ case Intrinsic::aarch64_sve_shsubr:
+ return SVEIntrinsicInfo::defaultMergingOp();
case Intrinsic::aarch64_sve_sqrshl:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqrshl_u);
case Intrinsic::aarch64_sve_sqshl:
@@ -1478,6 +1482,10 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqsub_u);
case Intrinsic::aarch64_sve_srshl:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_srshl_u);
+ case Intrinsic::aarch64_sve_uhsub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uhsub_u);
+ case Intrinsic::aarch64_sve_uhsubr:
+ return SVEIntrinsicInfo::defaultMergingOp();
case Intrinsic::aarch64_sve_uqrshl:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqrshl_u);
case Intrinsic::aarch64_sve_uqshl:
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll
index a471625cd5ad8..5356bba94a30b 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll
@@ -1,6 +1,154 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+;
+; SHSUB
+;
+
+define <vscale x 16 x i8> @shsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: shsub_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @shsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: shsub_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @shsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: shsub_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @shsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: shsub_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SHSUB (swapped operands)
+;
+
+define <vscale x 16 x i8> @shsub_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: shsub_i8_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @shsub_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: shsub_i16_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @shsub_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: shsub_i32_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @shsub_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: shsub_i64_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SHSUB (movprfx)
+;
+
+define <vscale x 16 x i8> @shsub_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: shsub_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: shsub z0.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @shsub_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: shsub_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: shsub z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @shsub_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: shsub_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: shsub z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @shsub_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: shsub_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: shsub z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
;
; SQRSHL
;
@@ -641,6 +789,154 @@ define <vscale x 2 x i64> @srshl_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2
ret <vscale x 2 x i64> %out
}
+;
+; UHSUB
+;
+
+define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uhsub_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uhsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uhsub_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uhsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uhsub_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uhsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uhsub_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uhsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UHSUB (swapped operands)
+;
+
+define <vscale x 16 x i8> @uhsub_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uhsub_i8_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uhsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uhsub_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uhsub_i16_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uhsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uhsub_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uhsub_i32_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uhsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uhsub_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uhsub_i64_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uhsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UHSUB (movprfx)
+;
+
+define <vscale x 16 x i8> @uhsub_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uhsub_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uhsub z0.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uhsub_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uhsub_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uhsub z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uhsub_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uhsub_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uhsub z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uhsub_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uhsub_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uhsub z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
;
; UQRSHL
;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
index b5420e9111746..f082bddbf9fec 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
@@ -741,8 +741,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @simplify_shsub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_shsub_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -752,8 +751,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <
define <vscale x 4 x i32> @simplify_shsubr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_shsubr_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -1017,8 +1015,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @simplify_uhsub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uhsub_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -1028,8 +1025,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <
define <vscale x 4 x i32> @simplify_uhsubr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uhsubr_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
index 96ac0efde8764..097ed87279eda 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
@@ -324,6 +324,28 @@ define <vscale x 4 x i32> @replace_sdiv_intrinsic_i32(<vscale x 4 x i32> %a, <vs
ret <vscale x 4 x i32> %r
}
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_shsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_shsub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_shsubr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_shsubr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32
@@ -431,6 +453,28 @@ define <vscale x 4 x i32> @replace_udiv_intrinsic_i32(<vscale x 4 x i32> %a, <vs
ret <vscale x 4 x i32> %r
}
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_uhsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uhsub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_uhsubr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uhsubr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32
More information about the llvm-commits
mailing list