[llvm] f199836 - [LLVM][AArch64] Add "u" variants of SVE saturating/rounding shift left intrinsics. (#170503)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 03:34:48 PST 2025
Author: Paul Walker
Date: 2025-12-05T11:34:42Z
New Revision: f1998360c3b1613440103101bce2b24b71695a4a
URL: https://github.com/llvm/llvm-project/commit/f1998360c3b1613440103101bce2b24b71695a4a
DIFF: https://github.com/llvm/llvm-project/commit/f1998360c3b1613440103101bce2b24b71695a4a.diff
LOG: [LLVM][AArch64] Add "u" variants of SVE saturating/rounding shift left intrinsics. (#170503)
This follows the same path used for regular shift intrinsics whereby the
"don't care about the result of inactive lanes" property of the
associated _x builtins is preserved. In doing this we gain the ability
to switch between reversed and movprfx variants depending on what best
fits register allocation.
Added:
Modified:
clang/include/clang/Basic/arm_sve.td
clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qrshl.c
clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qshl.c
clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rshl.c
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll
llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 8dc40a665bd9a..780891152845b 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1293,25 +1293,25 @@ defm SVRSQRTE : SInstZPZ<"svrsqrte", "Ui", "aarch64_sve_ursqrte">;
//------------------------------------------------------------------------------
-multiclass SInstZPZxZ<string name, string types, string pat_v, string pat_n, string intrinsic, list<FlagType> flags=[]> {
- def _M : SInst<name # "[_{d}]", pat_v, types, MergeOp1, intrinsic, flags>;
- def _X : SInst<name # "[_{d}]", pat_v, types, MergeAny, intrinsic, flags>;
- def _Z : SInst<name # "[_{d}]", pat_v, types, MergeZero, intrinsic, flags>;
+multiclass SInstZPZxZ<string name, string types, string pat_v, string pat_n, string m_intrinsic, string x_intrinsic, list<FlagType> flags=[]> {
+ def _M : SInst<name # "[_{d}]", pat_v, types, MergeOp1, m_intrinsic, flags>;
+ def _X : SInst<name # "[_{d}]", pat_v, types, MergeAny, x_intrinsic, flags>;
+ def _Z : SInst<name # "[_{d}]", pat_v, types, MergeZero, m_intrinsic, flags>;
- def _N_M : SInst<name # "[_n_{d}]", pat_n, types, MergeOp1, intrinsic, flags>;
- def _N_X : SInst<name # "[_n_{d}]", pat_n, types, MergeAny, intrinsic, flags>;
- def _N_Z : SInst<name # "[_n_{d}]", pat_n, types, MergeZero, intrinsic, flags>;
+ def _N_M : SInst<name # "[_n_{d}]", pat_n, types, MergeOp1, m_intrinsic, flags>;
+ def _N_X : SInst<name # "[_n_{d}]", pat_n, types, MergeAny, x_intrinsic, flags>;
+ def _N_Z : SInst<name # "[_n_{d}]", pat_n, types, MergeZero, m_intrinsic, flags>;
}
let SVETargetGuard = "sve2|sme" in {
-defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqrshl", [VerifyRuntimeMode]>;
-defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl", [VerifyRuntimeMode]>;
-defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl", [VerifyRuntimeMode]>;
-defm SVQSHL_U : SInstZPZxZ<"svqshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqshl", [VerifyRuntimeMode]>;
-defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_srshl", [VerifyRuntimeMode]>;
-defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl", [VerifyRuntimeMode]>;
-defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd", [VerifyRuntimeMode]>;
-defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd", [VerifyRuntimeMode]>;
+defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqrshl", "aarch64_sve_sqrshl_u", [VerifyRuntimeMode]>;
+defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl", "aarch64_sve_uqrshl_u", [VerifyRuntimeMode]>;
+defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl", "aarch64_sve_sqshl_u", [VerifyRuntimeMode]>;
+defm SVQSHL_U : SInstZPZxZ<"svqshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqshl", "aarch64_sve_uqshl_u", [VerifyRuntimeMode]>;
+defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_srshl", "aarch64_sve_srshl_u", [VerifyRuntimeMode]>;
+defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl", "aarch64_sve_urshl_u", [VerifyRuntimeMode]>;
+defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd", "aarch64_sve_usqadd", [VerifyRuntimeMode]>;
+defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd", "aarch64_sve_suqadd", [VerifyRuntimeMode]>;
def SVABA_S : SInst<"svaba[_{d}]", "dddd", "csil" , MergeNone, "aarch64_sve_saba", [VerifyRuntimeMode]>;
def SVABA_U : SInst<"svaba[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [VerifyRuntimeMode]>;
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qrshl.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qrshl.c
index cb1793d98418a..5b47497286847 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qrshl.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qrshl.c
@@ -297,12 +297,12 @@ svuint64_t test_svqrshl_u64_m(svbool_t pg, svuint64_t op1, svint64_t op2)
// CHECK-LABEL: @test_svqrshl_s8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_svqrshl_s8_xu10__SVBool_tu10__SVInt8_tS0_(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svqrshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
@@ -313,13 +313,13 @@ svint8_t test_svqrshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
// CHECK-LABEL: @test_svqrshl_s16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svqrshl_s16_xu10__SVBool_tu11__SVInt16_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svqrshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
@@ -330,13 +330,13 @@ svint16_t test_svqrshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
// CHECK-LABEL: @test_svqrshl_s32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svqrshl_s32_xu10__SVBool_tu11__SVInt32_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svqrshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
@@ -347,13 +347,13 @@ svint32_t test_svqrshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
// CHECK-LABEL: @test_svqrshl_s64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svqrshl_s64_xu10__SVBool_tu11__SVInt64_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svqrshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
@@ -363,12 +363,12 @@ svint64_t test_svqrshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
// CHECK-LABEL: @test_svqrshl_u8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_svqrshl_u8_xu10__SVBool_tu11__SVUint8_tu10__SVInt8_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svqrshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2)
@@ -379,13 +379,13 @@ svuint8_t test_svqrshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2)
// CHECK-LABEL: @test_svqrshl_u16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svqrshl_u16_xu10__SVBool_tu12__SVUint16_tu11__SVInt16_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svqrshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2)
@@ -396,13 +396,13 @@ svuint16_t test_svqrshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2)
// CHECK-LABEL: @test_svqrshl_u32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svqrshl_u32_xu10__SVBool_tu12__SVUint32_tu11__SVInt32_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svqrshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2)
@@ -413,13 +413,13 @@ svuint32_t test_svqrshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2)
// CHECK-LABEL: @test_svqrshl_u64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z18test_svqrshl_u64_xu10__SVBool_tu12__SVUint64_tu11__SVInt64_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svqrshl_u64_x(svbool_t pg, svuint64_t op1, svint64_t op2)
@@ -775,14 +775,14 @@ svuint64_t test_svqrshl_n_u64_m(svbool_t pg, svuint64_t op1, int64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svqrshl_n_s8_xu10__SVBool_tu10__SVInt8_ta(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svqrshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
@@ -795,7 +795,7 @@ svint8_t test_svqrshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svqrshl_n_s16_xu10__SVBool_tu11__SVInt16_ts(
@@ -803,7 +803,7 @@ svint8_t test_svqrshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svqrshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
@@ -816,7 +816,7 @@ svint16_t test_svqrshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svqrshl_n_s32_xu10__SVBool_tu11__SVInt32_ti(
@@ -824,7 +824,7 @@ svint16_t test_svqrshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svqrshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
@@ -837,7 +837,7 @@ svint32_t test_svqrshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svqrshl_n_s64_xu10__SVBool_tu11__SVInt64_tl(
@@ -845,7 +845,7 @@ svint32_t test_svqrshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svqrshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
@@ -857,14 +857,14 @@ svint64_t test_svqrshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z19test_svqrshl_n_u8_xu10__SVBool_tu11__SVUint8_ta(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svqrshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
@@ -877,7 +877,7 @@ svuint8_t test_svqrshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svqrshl_n_u16_xu10__SVBool_tu12__SVUint16_ts(
@@ -885,7 +885,7 @@ svuint8_t test_svqrshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svqrshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
@@ -898,7 +898,7 @@ svuint16_t test_svqrshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svqrshl_n_u32_xu10__SVBool_tu12__SVUint32_ti(
@@ -906,7 +906,7 @@ svuint16_t test_svqrshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svqrshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
@@ -919,7 +919,7 @@ svuint32_t test_svqrshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z20test_svqrshl_n_u64_xu10__SVBool_tu12__SVUint64_tl(
@@ -927,7 +927,7 @@ svuint32_t test_svqrshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svqrshl_n_u64_x(svbool_t pg, svuint64_t op1, int64_t op2)
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qshl.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qshl.c
index 4faef0234f037..8dc83b5247924 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qshl.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_qshl.c
@@ -297,12 +297,12 @@ svuint64_t test_svqshl_u64_m(svbool_t pg, svuint64_t op1, svint64_t op2)
// CHECK-LABEL: @test_svqshl_s8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svqshl_s8_xu10__SVBool_tu10__SVInt8_tS0_(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svqshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
@@ -313,13 +313,13 @@ svint8_t test_svqshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
// CHECK-LABEL: @test_svqshl_s16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svqshl_s16_xu10__SVBool_tu11__SVInt16_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svqshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
@@ -330,13 +330,13 @@ svint16_t test_svqshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
// CHECK-LABEL: @test_svqshl_s32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svqshl_s32_xu10__SVBool_tu11__SVInt32_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svqshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
@@ -347,13 +347,13 @@ svint32_t test_svqshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
// CHECK-LABEL: @test_svqshl_s64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svqshl_s64_xu10__SVBool_tu11__SVInt64_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svqshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
@@ -363,12 +363,12 @@ svint64_t test_svqshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
// CHECK-LABEL: @test_svqshl_u8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svqshl_u8_xu10__SVBool_tu11__SVUint8_tu10__SVInt8_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svqshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2)
@@ -379,13 +379,13 @@ svuint8_t test_svqshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2)
// CHECK-LABEL: @test_svqshl_u16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svqshl_u16_xu10__SVBool_tu12__SVUint16_tu11__SVInt16_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svqshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2)
@@ -396,13 +396,13 @@ svuint16_t test_svqshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2)
// CHECK-LABEL: @test_svqshl_u32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svqshl_u32_xu10__SVBool_tu12__SVUint32_tu11__SVInt32_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svqshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2)
@@ -413,13 +413,13 @@ svuint32_t test_svqshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2)
// CHECK-LABEL: @test_svqshl_u64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svqshl_u64_xu10__SVBool_tu12__SVUint64_tu11__SVInt64_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svqshl_u64_x(svbool_t pg, svuint64_t op1, svint64_t op2)
@@ -775,14 +775,14 @@ svuint64_t test_svqshl_n_u64_m(svbool_t pg, svuint64_t op1, int64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z18test_svqshl_n_s8_xu10__SVBool_tu10__SVInt8_ta(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svqshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
@@ -795,7 +795,7 @@ svint8_t test_svqshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svqshl_n_s16_xu10__SVBool_tu11__SVInt16_ts(
@@ -803,7 +803,7 @@ svint8_t test_svqshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svqshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
@@ -816,7 +816,7 @@ svint16_t test_svqshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svqshl_n_s32_xu10__SVBool_tu11__SVInt32_ti(
@@ -824,7 +824,7 @@ svint16_t test_svqshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svqshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
@@ -837,7 +837,7 @@ svint32_t test_svqshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svqshl_n_s64_xu10__SVBool_tu11__SVInt64_tl(
@@ -845,7 +845,7 @@ svint32_t test_svqshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svqshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
@@ -857,14 +857,14 @@ svint64_t test_svqshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z18test_svqshl_n_u8_xu10__SVBool_tu11__SVUint8_ta(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svqshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
@@ -877,7 +877,7 @@ svuint8_t test_svqshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svqshl_n_u16_xu10__SVBool_tu12__SVUint16_ts(
@@ -885,7 +885,7 @@ svuint8_t test_svqshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svqshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
@@ -898,7 +898,7 @@ svuint16_t test_svqshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svqshl_n_u32_xu10__SVBool_tu12__SVUint32_ti(
@@ -906,7 +906,7 @@ svuint16_t test_svqshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svqshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
@@ -919,7 +919,7 @@ svuint32_t test_svqshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svqshl_n_u64_xu10__SVBool_tu12__SVUint64_tl(
@@ -927,7 +927,7 @@ svuint32_t test_svqshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svqshl_n_u64_x(svbool_t pg, svuint64_t op1, int64_t op2)
diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rshl.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rshl.c
index 9891b80460aff..dc03a20850672 100644
--- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rshl.c
+++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_rshl.c
@@ -297,12 +297,12 @@ svuint64_t test_svrshl_u64_m(svbool_t pg, svuint64_t op1, svint64_t op2)
// CHECK-LABEL: @test_svrshl_s8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svrshl_s8_xu10__SVBool_tu10__SVInt8_tS0_(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svrshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
@@ -313,13 +313,13 @@ svint8_t test_svrshl_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
// CHECK-LABEL: @test_svrshl_s16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svrshl_s16_xu10__SVBool_tu11__SVInt16_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svrshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
@@ -330,13 +330,13 @@ svint16_t test_svrshl_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
// CHECK-LABEL: @test_svrshl_s32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svrshl_s32_xu10__SVBool_tu11__SVInt32_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svrshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
@@ -347,13 +347,13 @@ svint32_t test_svrshl_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
// CHECK-LABEL: @test_svrshl_s64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svrshl_s64_xu10__SVBool_tu11__SVInt64_tS0_(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svrshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
@@ -363,12 +363,12 @@ svint64_t test_svrshl_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
// CHECK-LABEL: @test_svrshl_u8_x(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z16test_svrshl_u8_xu10__SVBool_tu11__SVUint8_tu10__SVInt8_t(
// CPP-CHECK-NEXT: entry:
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svrshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2)
@@ -379,13 +379,13 @@ svuint8_t test_svrshl_u8_x(svbool_t pg, svuint8_t op1, svint8_t op2)
// CHECK-LABEL: @test_svrshl_u16_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svrshl_u16_xu10__SVBool_tu12__SVUint16_tu11__SVInt16_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svrshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2)
@@ -396,13 +396,13 @@ svuint16_t test_svrshl_u16_x(svbool_t pg, svuint16_t op1, svint16_t op2)
// CHECK-LABEL: @test_svrshl_u32_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svrshl_u32_xu10__SVBool_tu12__SVUint32_tu11__SVInt32_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svrshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2)
@@ -413,13 +413,13 @@ svuint32_t test_svrshl_u32_x(svbool_t pg, svuint32_t op1, svint32_t op2)
// CHECK-LABEL: @test_svrshl_u64_x(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z17test_svrshl_u64_xu10__SVBool_tu12__SVUint64_tu11__SVInt64_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svrshl_u64_x(svbool_t pg, svuint64_t op1, svint64_t op2)
@@ -775,14 +775,14 @@ svuint64_t test_svrshl_n_u64_m(svbool_t pg, svuint64_t op1, int64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z18test_svrshl_n_s8_xu10__SVBool_tu10__SVInt8_ta(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svint8_t test_svrshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
@@ -795,7 +795,7 @@ svint8_t test_svrshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svrshl_n_s16_xu10__SVBool_tu11__SVInt16_ts(
@@ -803,7 +803,7 @@ svint8_t test_svrshl_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svint16_t test_svrshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
@@ -816,7 +816,7 @@ svint16_t test_svrshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svrshl_n_s32_xu10__SVBool_tu11__SVInt32_ti(
@@ -824,7 +824,7 @@ svint16_t test_svrshl_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svint32_t test_svrshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
@@ -837,7 +837,7 @@ svint32_t test_svrshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svrshl_n_s64_xu10__SVBool_tu11__SVInt64_tl(
@@ -845,7 +845,7 @@ svint32_t test_svrshl_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svint64_t test_svrshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
@@ -857,14 +857,14 @@ svint64_t test_svrshl_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z18test_svrshl_n_u8_xu10__SVBool_tu11__SVUint8_ta(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svuint8_t test_svrshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
@@ -877,7 +877,7 @@ svuint8_t test_svrshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svrshl_n_u16_xu10__SVBool_tu12__SVUint16_ts(
@@ -885,7 +885,7 @@ svuint8_t test_svrshl_n_u8_x(svbool_t pg, svuint8_t op1, int8_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
svuint16_t test_svrshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
@@ -898,7 +898,7 @@ svuint16_t test_svrshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svrshl_n_u32_xu10__SVBool_tu12__SVUint32_ti(
@@ -906,7 +906,7 @@ svuint16_t test_svrshl_n_u16_x(svbool_t pg, svuint16_t op1, int16_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
svuint32_t test_svrshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
@@ -919,7 +919,7 @@ svuint32_t test_svrshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z19test_svrshl_n_u64_xu10__SVBool_tu12__SVUint64_tl(
@@ -927,7 +927,7 @@ svuint32_t test_svrshl_n_u32_x(svbool_t pg, svuint32_t op1, int32_t op2)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0
// CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
+// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
svuint64_t test_svrshl_n_u64_x(svbool_t pg, svuint64_t op1, int64_t op2)
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 1c86c6815f049..272e800dfc94c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2449,7 +2449,9 @@ def int_aarch64_sve_sqrdmlsh_lane : AdvSIMD_3VectorArgIndexed_Intrinsic<[IntrSpe
def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sqrdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sqrshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_sqrshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sqshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_sqshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sqshlu : AdvSIMD_SVE_ShiftByImm_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sqsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sqsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
@@ -2457,6 +2459,7 @@ def int_aarch64_sve_sqsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpecul
def int_aarch64_sve_srhadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_sri : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_srshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_srshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_srshr : AdvSIMD_SVE_ShiftByImm_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_srsra : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_ssra : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
@@ -2467,13 +2470,16 @@ def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpecul
def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_uqrshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uqshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_uqshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uqsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uqsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_uqsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_urecpe : AdvSIMD_Merged1VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_urhadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_urshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
+def int_aarch64_sve_urshl_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_urshr : AdvSIMD_SVE_ShiftByImm_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_ursqrte : AdvSIMD_Merged1VectorArg_Intrinsic<[IntrSpeculatable]>;
def int_aarch64_sve_ursra : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7b51987d3c603..bbcffb0c44e85 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3886,12 +3886,12 @@ let Predicates = [HasSVE2_or_SME] in {
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag, "SQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHL_ZPmZ", /*isReverseInstr*/ 1>;
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag, "UQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHL_ZPmZ", /*isReverseInstr*/ 1>;
- defm SRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_srshl>;
- defm URSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_urshl>;
- defm SQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqshl>;
- defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqshl>;
- defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqrshl>;
- defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqrshl>;
+ defm SRSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_srshl_u>;
+ defm URSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_urshl_u>;
+ defm SQSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_sqshl_u>;
+ defm UQSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_uqshl_u>;
+ defm SQRSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_sqrshl_u>;
+ defm UQRSHL_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_uqrshl_u>;
} // End HasSVE2_or_SME
let Predicates = [HasSVE2_or_SME, UseExperimentalZeroingPseudos] in {
@@ -3910,6 +3910,9 @@ let Predicates = [HasSVE2_or_SME] in {
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right< 0b1101, "urshr", "URSHR_ZPZI", AArch64urshri_p>;
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
+ defm SQSHL_ZPZI : sve_int_shift_pred_bhsd<int_aarch64_sve_sqshl_u, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
+ defm UQSHL_ZPZI : sve_int_shift_pred_bhsd<int_aarch64_sve_uqshl_u, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
+
// SVE2 integer add/subtract long
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0b19471eedb78..b2a9f9cb75910 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1446,10 +1446,22 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
case Intrinsic::aarch64_sve_orr:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_orr_u)
.setMatchingIROpcode(Instruction::Or);
+ case Intrinsic::aarch64_sve_sqrshl:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqrshl_u);
+ case Intrinsic::aarch64_sve_sqshl:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqshl_u);
case Intrinsic::aarch64_sve_sqsub:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqsub_u);
+ case Intrinsic::aarch64_sve_srshl:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_srshl_u);
+ case Intrinsic::aarch64_sve_uqrshl:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqrshl_u);
+ case Intrinsic::aarch64_sve_uqshl:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqshl_u);
case Intrinsic::aarch64_sve_uqsub:
return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqsub_u);
+ case Intrinsic::aarch64_sve_urshl:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_urshl_u);
case Intrinsic::aarch64_sve_add_u:
return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll
index 13e1eae8caec8..a471625cd5ad8 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll
@@ -1,54 +1,1090 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+;
+; SQRSHL
+;
+
+define <vscale x 16 x i8> @sqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqrshl_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqrshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqrshl_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqrshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqrshl_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqrshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqrshl_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqrshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SQRSHL (swapped operands)
+;
+
+define <vscale x 16 x i8> @sqrshl_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqrshl_i8_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqrshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqrshl_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqrshl_i16_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqrshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqrshl_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqrshl_i32_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqrshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqrshl_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqrshl_i64_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqrshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SQRSHL (movprfx)
+;
+
+define <vscale x 16 x i8> @sqrshl_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqrshl_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqrshl z0.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqrshl_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqrshl_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqrshl z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqrshl_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqrshl_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqrshl z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqrshl_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqrshl_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqrshl z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SQSHL (Vectors)
+;
+
+define <vscale x 16 x i8> @sqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqshl_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqshl_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqshl_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqshl_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SQSHL (Vectors, swapped operands)
+;
+
+define <vscale x 16 x i8> @sqshl_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqshl_i8_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshl_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqshl_i16_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshl_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqshl_i32_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshl_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqshl_i64_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SQSHL (Vectors, movpfrx)
+;
+
+define <vscale x 16 x i8> @sqshl_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqshl_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshl_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqshl_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshl_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqshl_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshl_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqshl_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SQSHL (Immediate)
+;
+
+define <vscale x 16 x i8> @sqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqshl_n_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> splat (i8 7))
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqshl_n_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> splat (i16 15))
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqshl_n_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> splat (i32 31))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqshl_n_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> splat (i64 63))
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 16 x i8> @sqshl_n_i8_out_of_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqshl_n_i8_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.b, #8 // =0x8
+; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> splat (i8 8))
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshl_n_i16_out_of_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqshl_n_i16_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.h, #16 // =0x10
+; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> splat (i16 16))
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshl_n_i32_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqshl_n_i32_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.s, #32 // =0x20
+; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> splat (i32 32))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshl_n_i64_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqshl_n_i64_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.d, #64 // =0x40
+; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> splat (i64 64))
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SQSHL (Immediate, movprfx)
+;
+
+define <vscale x 16 x i8> @sqshl_n_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqshl_n_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqshl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> splat (i8 7))
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshl_n_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqshl_n_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqshl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> splat (i16 15))
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshl_n_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqshl_n_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqshl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> splat (i32 31))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshl_n_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqshl_n_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqshl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> splat (i64 63))
+ ret <vscale x 2 x i64> %out
+}
+
;
; SQSUB
;
-define <vscale x 16 x i8> @sqsub_i8_u(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: sqsub_i8_u:
+define <vscale x 16 x i8> @sqsub_i8_u(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqsub_i8_u:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqsub z0.b, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqsub_i16_u(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqsub_i16_u:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqsub z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqsub_i32_u(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqsub_i32_u:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqsub z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqsub_i64_u(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqsub_i64_u:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqsub z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SRSHL
+;
+
+define <vscale x 16 x i8> @srshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: srshl_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @srshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: srshl_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @srshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: srshl_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: srshl_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SRSHL (swapped operands)
+;
+
+define <vscale x 16 x i8> @srshl_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: srshl_i8_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @srshl_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: srshl_i16_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @srshl_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: srshl_i32_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @srshl_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: srshl_i64_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: srshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SRSHL (movprfx)
+;
+
+define <vscale x 16 x i8> @srshl_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: srshl_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: srshl z0.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @srshl_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: srshl_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: srshl z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @srshl_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: srshl_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: srshl z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @srshl_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: srshl_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: srshl z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQRSHL
+;
+
+define <vscale x 16 x i8> @uqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqrshl_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqrshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqrshl_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqrshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqrshl_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqrshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqrshl_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqrshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQRSHL (swapped operands)
+;
+
+define <vscale x 16 x i8> @uqrshl_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqrshl_i8_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqrshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqrshl_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqrshl_i16_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqrshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqrshl_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqrshl_i32_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqrshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqrshl_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqrshl_i64_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqrshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQRSHL (movprfx)
+;
+
+define <vscale x 16 x i8> @uqrshl_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqrshl_i8_movprfx:
; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub z0.b, z0.b, z1.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqrshl z0.b, p0/m, z0.b, z2.b
; CHECK-NEXT: ret
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqrshl_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqrshl_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqrshl z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqrshl_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqrshl_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqrshl z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqrshl_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqrshl_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqrshl z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQSHL (Vectors)
+;
+
+define <vscale x 16 x i8> @uqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqshl_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %out
}
-define <vscale x 8 x i16> @sqsub_i16_u(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: sqsub_i16_u:
+define <vscale x 8 x i16> @uqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqshl_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub z0.h, z0.h, z1.h
+; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %out
}
-define <vscale x 4 x i32> @sqsub_i32_u(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: sqsub_i32_u:
+define <vscale x 4 x i32> @uqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqshl_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub z0.s, z0.s, z1.s
+; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %out
}
-define <vscale x 2 x i64> @sqsub_i64_u(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: sqsub_i64_u:
+define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqshl_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub z0.d, z0.d, z1.d
+; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQSHL (Vectors, swapped operands)
+;
+
+define <vscale x 16 x i8> @uqshl_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqshl_i8_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshl_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqshl_i16_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshl_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqshl_i32_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqshl_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqshl_i64_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQSHL (Vectors, movprfx)
+;
+
+define <vscale x 16 x i8> @uqshl_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqshl_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshl_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqshl_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshl_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqshl_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqshl_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqshl_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}
+;
+; UQSHL (Immediate)
+;
+
+define <vscale x 16 x i8> @uqshl_n_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: uqshl_n_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> splat(i8 7))
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshl_n_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: uqshl_n_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> splat(i16 15))
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshl_n_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: uqshl_n_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> splat(i32 31))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqshl_n_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: uqshl_n_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> splat(i64 63))
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 16 x i8> @uqshl_n_i8_out_of_range(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: uqshl_n_i8_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.b, #8 // =0x8
+; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> splat(i8 8))
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshl_n_i16_out_of_range(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: uqshl_n_i16_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.h, #16 // =0x10
+; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> splat(i16 16))
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshl_n_i32_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: uqshl_n_i32_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.s, #32 // =0x20
+; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> splat(i32 32))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqshl_n_i64_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: uqshl_n_i64_out_of_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z1.d, #64 // =0x40
+; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> splat(i64 64))
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQSHL (Immediate, movprfx)
+;
+
+define <vscale x 16 x i8> @uqshl_n_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: uqshl_n_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqshl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> splat(i8 7))
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshl_n_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: uqshl_n_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqshl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> splat(i16 15))
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshl_n_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: uqshl_n_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqshl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> splat(i32 31))
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqshl_n_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: uqshl_n_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: uqshl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> splat(i64 63))
+ ret <vscale x 2 x i64> %out
+}
+
;
; UQSUB
;
@@ -97,12 +1133,150 @@ define <vscale x 2 x i64> @uqsub_i64_u(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
ret <vscale x 2 x i64> %out
}
-declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+;
+; URSHL
+;
+
+define <vscale x 16 x i8> @urshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: urshl_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @urshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: urshl_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @urshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: urshl_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: urshl_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; URSHL (swapped operands)
+;
+
+define <vscale x 16 x i8> @urshl_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: urshl_i8_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @urshl_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: urshl_i16_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @urshl_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: urshl_i32_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @urshl_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: urshl_i64_swapped_operands:
+; CHECK: // %bb.0:
+; CHECK-NEXT: urshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; URSHL (movprfx)
+;
+
+define <vscale x 16 x i8> @urshl_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: urshl_i8_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: urshl z0.b, p0/m, z0.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.u.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @urshl_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: urshl_i16_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: urshl z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.u.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @urshl_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: urshl_i32_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: urshl z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @urshl_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: urshl_i64_movprfx:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: urshl z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.u.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
index 1b6873e84b09e..d76b8c7f8dc2a 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
@@ -771,74 +771,6 @@ define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
ret <vscale x 2 x i64> %out
}
-;
-; SQRSHLR
-;
-
-define <vscale x 16 x i8> @sqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: sqrshlr_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: sqrshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
- %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
- <vscale x 16 x i8> %b,
- <vscale x 16 x i8> %a)
- ret <vscale x 16 x i8> %out
-}
-
-define <vscale x 8 x i16> @sqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: sqrshlr_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sqrshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
- <vscale x 8 x i16> %b,
- <vscale x 8 x i16> %a)
- ret <vscale x 8 x i16> %out
-}
-
-define <vscale x 4 x i32> @sqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: sqrshlr_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sqrshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
- <vscale x 4 x i32> %b,
- <vscale x 4 x i32> %a)
- ret <vscale x 4 x i32> %out
-}
-
-define <vscale x 2 x i64> @sqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: sqrshlr_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sqrshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
-define <vscale x 2 x i64> @sqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: sqrshlr_i64_noptrue:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqrshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
;
; SQSHL (Vectors)
;
@@ -887,74 +819,6 @@ define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %out
}
-;
-; SQSHLR
-;
-
-define <vscale x 16 x i8> @sqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: sqshlr_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: sqshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
- %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
- <vscale x 16 x i8> %b,
- <vscale x 16 x i8> %a)
- ret <vscale x 16 x i8> %out
-}
-
-define <vscale x 8 x i16> @sqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: sqshlr_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: sqshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
- <vscale x 8 x i16> %b,
- <vscale x 8 x i16> %a)
- ret <vscale x 8 x i16> %out
-}
-
-define <vscale x 4 x i32> @sqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: sqshlr_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: sqshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
- <vscale x 4 x i32> %b,
- <vscale x 4 x i32> %a)
- ret <vscale x 4 x i32> %out
-}
-
-define <vscale x 2 x i64> @sqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: sqshlr_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: sqshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
-define <vscale x 2 x i64> @sqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: sqshlr_i64_noptrue:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
;
; SQSHL (Scalar)
;
@@ -1346,75 +1210,6 @@ define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
<vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}
-
-;
-; SRSHLR
-;
-
-define <vscale x 16 x i8> @srshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: srshlr_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: srshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
- %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
- <vscale x 16 x i8> %b,
- <vscale x 16 x i8> %a)
- ret <vscale x 16 x i8> %out
-}
-
-define <vscale x 8 x i16> @srshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: srshlr_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: srshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
- <vscale x 8 x i16> %b,
- <vscale x 8 x i16> %a)
- ret <vscale x 8 x i16> %out
-}
-
-define <vscale x 4 x i32> @srshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: srshlr_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: srshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
- <vscale x 4 x i32> %b,
- <vscale x 4 x i32> %a)
- ret <vscale x 4 x i32> %out
-}
-
-define <vscale x 2 x i64> @srshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: srshlr_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: srshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
-define <vscale x 2 x i64> @srshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: srshlr_i64_noptrue:
-; CHECK: // %bb.0:
-; CHECK-NEXT: srshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
;
; SRSHR
;
@@ -1895,74 +1690,6 @@ define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
ret <vscale x 2 x i64> %out
}
-;
-; UQRSHLR
-;
-
-define <vscale x 16 x i8> @uqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: uqrshlr_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: uqrshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
- %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
- <vscale x 16 x i8> %b,
- <vscale x 16 x i8> %a)
- ret <vscale x 16 x i8> %out
-}
-
-define <vscale x 8 x i16> @uqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: uqrshlr_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: uqrshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
- <vscale x 8 x i16> %b,
- <vscale x 8 x i16> %a)
- ret <vscale x 8 x i16> %out
-}
-
-define <vscale x 4 x i32> @uqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: uqrshlr_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: uqrshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
- <vscale x 4 x i32> %b,
- <vscale x 4 x i32> %a)
- ret <vscale x 4 x i32> %out
-}
-
-define <vscale x 2 x i64> @uqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: uqrshlr_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: uqrshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
-define <vscale x 2 x i64> @uqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: uqrshlr_i64_noptrue:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqrshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
;
; UQSHL (Vectors)
;
@@ -2011,74 +1738,6 @@ define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %out
}
-;
-; UQSHLR
-;
-
-define <vscale x 16 x i8> @uqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: uqshlr_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: uqshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
- %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
- <vscale x 16 x i8> %b,
- <vscale x 16 x i8> %a)
- ret <vscale x 16 x i8> %out
-}
-
-define <vscale x 8 x i16> @uqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: uqshlr_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: uqshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
- <vscale x 8 x i16> %b,
- <vscale x 8 x i16> %a)
- ret <vscale x 8 x i16> %out
-}
-
-define <vscale x 4 x i32> @uqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: uqshlr_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: uqshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
- <vscale x 4 x i32> %b,
- <vscale x 4 x i32> %a)
- ret <vscale x 4 x i32> %out
-}
-
-define <vscale x 2 x i64> @uqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: uqshlr_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: uqshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
-define <vscale x 2 x i64> @uqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: uqshlr_i64_noptrue:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
;
; UQSHL (Scalar)
;
@@ -2390,74 +2049,6 @@ define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %out
}
-;
-; URSHLR
-;
-
-define <vscale x 16 x i8> @urshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: urshlr_i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: urshlr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ret
- %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
- <vscale x 16 x i8> %b,
- <vscale x 16 x i8> %a)
- ret <vscale x 16 x i8> %out
-}
-
-define <vscale x 8 x i16> @urshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: urshlr_i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: urshlr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
- <vscale x 8 x i16> %b,
- <vscale x 8 x i16> %a)
- ret <vscale x 8 x i16> %out
-}
-
-define <vscale x 4 x i32> @urshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: urshlr_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: urshlr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
- <vscale x 4 x i32> %b,
- <vscale x 4 x i32> %a)
- ret <vscale x 4 x i32> %out
-}
-
-define <vscale x 2 x i64> @urshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: urshlr_i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: urshlr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
-define <vscale x 2 x i64> @urshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: urshlr_i64_noptrue:
-; CHECK: // %bb.0:
-; CHECK-NEXT: urshl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %b,
- <vscale x 2 x i64> %a)
- ret <vscale x 2 x i64> %out
-}
-
;
; URSHR
;
@@ -2884,8 +2475,3 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8>, <v
declare <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
-
-declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
-declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
-declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
-declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
index ddcaeaf44592e..b5420e9111746 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
@@ -848,8 +848,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1>, <
define <vscale x 4 x i32> @simplify_sqrshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqrshl_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -859,8 +858,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @simplify_sqshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqshl_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -902,8 +900,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @simplify_srshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_srshl_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -1105,8 +1102,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1>, <
define <vscale x 4 x i32> @simplify_uqrshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqrshl_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -1116,8 +1112,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @simplify_uqshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqshl_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -1170,8 +1165,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1>, <v
define <vscale x 4 x i32> @simplify_urshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @simplify_urshl_intrinsic
; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
;
%r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
index 8072b3f8f5394..96ac0efde8764 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
@@ -357,6 +357,26 @@ define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <v
ret <vscale x 4 x i32> %r
}
+define <vscale x 4 x i32> @replace_sqrshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqrshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @replace_sqshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32
@@ -368,6 +388,16 @@ define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <v
ret <vscale x 4 x i32> %r
}
+define <vscale x 4 x i32> @replace_srshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_srshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32
@@ -434,6 +464,26 @@ define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <v
ret <vscale x 4 x i32> %r
}
+define <vscale x 4 x i32> @replace_uqrshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqrshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @replace_uqshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32
@@ -445,4 +495,14 @@ define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <v
ret <vscale x 4 x i32> %r
}
+define <vscale x 4 x i32> @replace_urshl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_urshl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[R]]
+;
+ %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
attributes #0 = { "target-features"="+sve,+sve2" }
More information about the llvm-commits
mailing list