[llvm] c5ed93f - [SVE ACLE] Remove DAG combines that are no longer relevant.

Jolanta Jensen via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 23 02:09:43 PDT 2023


Author: Jolanta Jensen
Date: 2023-06-23T09:09:07Z
New Revision: c5ed93f975830b4ed52f1899bfc9d8c89bf81c38

URL: https://github.com/llvm/llvm-project/commit/c5ed93f975830b4ed52f1899bfc9d8c89bf81c38
DIFF: https://github.com/llvm/llvm-project/commit/c5ed93f975830b4ed52f1899bfc9d8c89bf81c38.diff

LOG: [SVE ACLE] Remove DAG combines that are no longer relevant.

This patch removes DAG combines that are no longer relevant
because equivalent IR combines have been added.

Differential Revision: https://reviews.llvm.org/D153445

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-aba.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
    llvm/test/CodeGen/AArch64/sve-saba.ll
    llvm/test/CodeGen/AArch64/sve2-int-mul.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll
    llvm/test/CodeGen/AArch64/sve2-sra.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3488f262c15e8..fd226aa039fda 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18921,58 +18921,36 @@ static SDValue performIntrinsicCombine(SDNode *N,
                        N->getOperand(1));
   case Intrinsic::aarch64_sve_ext:
     return LowerSVEIntrinsicEXT(N, DAG);
-  case Intrinsic::aarch64_sve_mul:
-    return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
   case Intrinsic::aarch64_sve_mul_u:
     return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_smulh:
-    return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
   case Intrinsic::aarch64_sve_smulh_u:
     return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_umulh:
-    return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
   case Intrinsic::aarch64_sve_umulh_u:
     return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_smin:
-    return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
   case Intrinsic::aarch64_sve_smin_u:
     return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_umin:
-    return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
   case Intrinsic::aarch64_sve_umin_u:
     return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_smax:
-    return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
   case Intrinsic::aarch64_sve_smax_u:
     return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_umax:
-    return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
   case Intrinsic::aarch64_sve_umax_u:
     return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_lsl:
-    return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
   case Intrinsic::aarch64_sve_lsl_u:
     return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_lsr:
-    return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
   case Intrinsic::aarch64_sve_lsr_u:
     return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_asr:
-    return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
   case Intrinsic::aarch64_sve_asr_u:
     return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_fadd:
-    return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
   case Intrinsic::aarch64_sve_fadd_u:
     return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
@@ -18995,55 +18973,35 @@ static SDValue performIntrinsicCombine(SDNode *N,
   case Intrinsic::aarch64_sve_fminnm_u:
     return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_fmul:
-    return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
   case Intrinsic::aarch64_sve_fmul_u:
     return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_fsub:
-    return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
   case Intrinsic::aarch64_sve_fsub_u:
     return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_add:
-    return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
   case Intrinsic::aarch64_sve_add_u:
     return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2),
                        N->getOperand(3));
-  case Intrinsic::aarch64_sve_sub:
-    return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
   case Intrinsic::aarch64_sve_sub_u:
     return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2),
                        N->getOperand(3));
   case Intrinsic::aarch64_sve_subr:
     return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
-  case Intrinsic::aarch64_sve_and:
-    return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
   case Intrinsic::aarch64_sve_and_u:
     return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2),
                        N->getOperand(3));
-  case Intrinsic::aarch64_sve_bic:
-    return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
   case Intrinsic::aarch64_sve_bic_u:
     return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
                        N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_eor:
-    return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
   case Intrinsic::aarch64_sve_eor_u:
     return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
                        N->getOperand(3));
-  case Intrinsic::aarch64_sve_orr:
-    return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
   case Intrinsic::aarch64_sve_orr_u:
     return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2),
                        N->getOperand(3));
-  case Intrinsic::aarch64_sve_sabd:
-    return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true);
   case Intrinsic::aarch64_sve_sabd_u:
     return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
                        N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_uabd:
-    return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true);
   case Intrinsic::aarch64_sve_uabd_u:
     return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
                        N->getOperand(2), N->getOperand(3));
@@ -19055,15 +19013,11 @@ static SDValue performIntrinsicCombine(SDNode *N,
                        N->getOperand(1), N->getOperand(2), N->getOperand(3));
   case Intrinsic::aarch64_sve_sqadd:
     return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
-  case Intrinsic::aarch64_sve_sqsub:
-    return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
   case Intrinsic::aarch64_sve_sqsub_u:
     return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
                        N->getOperand(2), N->getOperand(3));
   case Intrinsic::aarch64_sve_uqadd:
     return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
-  case Intrinsic::aarch64_sve_uqsub:
-    return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
   case Intrinsic::aarch64_sve_uqsub_u:
     return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
                        N->getOperand(2), N->getOperand(3));

diff  --git a/llvm/test/CodeGen/AArch64/sve-aba.ll b/llvm/test/CodeGen/AArch64/sve-aba.ll
index b1298e097ab6c..6859f7d017044 100644
--- a/llvm/test/CodeGen/AArch64/sve-aba.ll
+++ b/llvm/test/CodeGen/AArch64/sve-aba.ll
@@ -42,7 +42,7 @@ define <vscale x 16 x i8> @saba_b_from_sabd(<vscale x 16 x i8> %a, <vscale x 16
 ; CHECK-NEXT:    saba z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
+  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   %3 = add <vscale x 16 x i8> %2, %a
   ret <vscale x 16 x i8> %3
 }
@@ -93,7 +93,7 @@ define <vscale x 8 x i16> @saba_h_from_sabd(<vscale x 8 x i16> %a, <vscale x 8 x
 ; CHECK-NEXT:    saba z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
+  %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   %3 = add <vscale x 8 x i16> %2, %a
   ret <vscale x 8 x i16> %3
 }
@@ -144,7 +144,7 @@ define <vscale x 4 x i32> @saba_s_from_sabd(<vscale x 4 x i32> %a, <vscale x 4 x
 ; CHECK-NEXT:    saba z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   %3 = add <vscale x 4 x i32> %2, %a
   ret <vscale x 4 x i32> %3
 }
@@ -195,7 +195,7 @@ define <vscale x 2 x i64> @saba_d_from_sabd(<vscale x 2 x i64> %a, <vscale x 2 x
 ; CHECK-NEXT:    saba z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   %3 = add <vscale x 2 x i64> %2, %a
   ret <vscale x 2 x i64> %3
 }
@@ -249,7 +249,7 @@ define <vscale x 16 x i8> @uaba_b_from_uabd(<vscale x 16 x i8> %a, <vscale x 16
 ; CHECK-NEXT:    uaba z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
+  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   %3 = add <vscale x 16 x i8> %2, %a
   ret <vscale x 16 x i8> %3
 }
@@ -299,7 +299,7 @@ define <vscale x 8 x i16> @uaba_h_from_uabd(<vscale x 8 x i16> %a, <vscale x 8 x
 ; CHECK-NEXT:    uaba z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
+  %2 = call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   %3 = add <vscale x 8 x i16> %2, %a
   ret <vscale x 8 x i16> %3
 }
@@ -349,7 +349,7 @@ define <vscale x 4 x i32> @uaba_s_from_uabd(<vscale x 4 x i32> %a, <vscale x 4 x
 ; CHECK-NEXT:    uaba z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   %3 = add <vscale x 4 x i32> %2, %a
   ret <vscale x 4 x i32> %3
 }
@@ -399,7 +399,7 @@ define <vscale x 2 x i64> @uaba_d_from_uabd(<vscale x 2 x i64> %a, <vscale x 2 x
 ; CHECK-NEXT:    uaba z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   %3 = add <vscale x 2 x i64> %2, %a
   ret <vscale x 2 x i64> %3
 }
@@ -443,21 +443,11 @@ declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
 declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
 declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index 58e9137dc4c5a..59d52b00c00f5 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -11,9 +11,9 @@ define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -25,9 +25,9 @@ define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -40,9 +40,9 @@ define <vscale x 8 x i16> @add_i16_out_of_range(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -54,9 +54,9 @@ define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -70,9 +70,9 @@ define <vscale x 4 x i32> @add_i32_out_of_range(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -84,9 +84,9 @@ define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -100,9 +100,9 @@ define <vscale x 2 x i64> @add_i64_out_of_range(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -116,9 +116,9 @@ define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -130,9 +130,9 @@ define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -145,9 +145,9 @@ define <vscale x 8 x i16> @sub_i16_out_of_range(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -159,9 +159,9 @@ define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -175,9 +175,9 @@ define <vscale x 4 x i32> @sub_i32_out_of_range(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -189,9 +189,9 @@ define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -205,9 +205,9 @@ define <vscale x 2 x i64> @sub_i64_out_of_range(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -220,9 +220,9 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -236,9 +236,9 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -426,9 +426,9 @@ define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                <vscale x 16 x i8> %a,
-                                                                <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                  <vscale x 16 x i8> %a,
+                                                                  <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -440,9 +440,9 @@ define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                <vscale x 8 x i16> %a,
-                                                                <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a,
+                                                                  <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -457,9 +457,9 @@ define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                <vscale x 8 x i16> %a,
-                                                                <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a,
+                                                                  <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -471,9 +471,9 @@ define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 -128, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                <vscale x 4 x i32> %a,
-                                                                <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a,
+                                                                  <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -487,9 +487,9 @@ define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                <vscale x 4 x i32> %a,
-                                                                <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a,
+                                                                  <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -501,9 +501,9 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                <vscale x 2 x i64> %a,
-                                                                <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a,
+                                                                  <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -517,9 +517,9 @@ define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                <vscale x 2 x i64> %a,
-                                                                <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a,
+                                                                  <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -532,9 +532,9 @@ define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                       <vscale x 4 x i32> %a,
+                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -548,9 +548,9 @@ define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                       <vscale x 4 x i32> %a,
+                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -583,9 +583,9 @@ define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                <vscale x 16 x i8> %a,
-                                                                <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                  <vscale x 16 x i8> %a,
+                                                                  <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -597,9 +597,9 @@ define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                <vscale x 8 x i16> %a,
-                                                                <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a,
+                                                                  <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -613,9 +613,9 @@ define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 -129, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                <vscale x 8 x i16> %a,
-                                                                <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a,
+                                                                  <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -627,9 +627,9 @@ define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                <vscale x 4 x i32> %a,
-                                                                <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a,
+                                                                  <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -644,9 +644,9 @@ define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                <vscale x 4 x i32> %a,
-                                                                <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a,
+                                                                  <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -659,9 +659,9 @@ define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 -128, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                <vscale x 2 x i64> %a,
-                                                                <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a,
+                                                                  <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -675,9 +675,9 @@ define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 -256, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                <vscale x 2 x i64> %a,
-                                                                <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a,
+                                                                  <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -690,9 +690,9 @@ define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                       <vscale x 4 x i32> %a,
+                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -706,9 +706,9 @@ define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                       <vscale x 4 x i32> %a,
+                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -741,9 +741,9 @@ define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 0, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                <vscale x 16 x i8> %a,
-                                                                <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                  <vscale x 16 x i8> %a,
+                                                                  <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -755,9 +755,9 @@ define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                <vscale x 8 x i16> %a,
-                                                                <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a,
+                                                                  <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -771,9 +771,9 @@ define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                <vscale x 8 x i16> %a,
-                                                                <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a,
+                                                                  <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -785,9 +785,9 @@ define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 0, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                <vscale x 4 x i32> %a,
-                                                                <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a,
+                                                                  <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -802,9 +802,9 @@ define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                <vscale x 4 x i32> %a,
-                                                                <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a,
+                                                                  <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -816,9 +816,9 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 255, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                <vscale x 2 x i64> %a,
-                                                                <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a,
+                                                                  <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -832,9 +832,9 @@ define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                <vscale x 2 x i64> %a,
-                                                                <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a,
+                                                                  <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -847,9 +847,9 @@ define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                       <vscale x 4 x i32> %a,
+                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -863,9 +863,9 @@ define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                       <vscale x 4 x i32> %a,
+                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -898,9 +898,9 @@ define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 255, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                <vscale x 16 x i8> %a,
-                                                                <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                  <vscale x 16 x i8> %a,
+                                                                  <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -912,9 +912,9 @@ define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                <vscale x 8 x i16> %a,
-                                                                <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a,
+                                                                  <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -928,9 +928,9 @@ define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                <vscale x 8 x i16> %a,
-                                                                <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a,
+                                                                  <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -942,9 +942,9 @@ define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                <vscale x 4 x i32> %a,
-                                                                <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a,
+                                                                  <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -959,9 +959,9 @@ define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                <vscale x 4 x i32> %a,
-                                                                <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a,
+                                                                  <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -973,9 +973,9 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 0, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                <vscale x 2 x i64> %a,
-                                                                <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a,
+                                                                  <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -989,9 +989,9 @@ define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                <vscale x 2 x i64> %a,
-                                                                <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a,
+                                                                  <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -1004,9 +1004,9 @@ define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                       <vscale x 4 x i32> %a,
+                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -1020,9 +1020,9 @@ define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                       <vscale x 4 x i32> %a,
+                                                                       <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -1412,9 +1412,9 @@ define <vscale x 16 x i8> @asr_i8_all_active(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -1452,9 +1452,9 @@ define <vscale x 8 x i16> @asr_i16_all_active(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -1492,9 +1492,9 @@ define <vscale x 4 x i32> @asr_i32_all_active(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -1532,9 +1532,9 @@ define <vscale x 2 x i64> @asr_i64_all_active(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -1574,9 +1574,9 @@ define <vscale x 16 x i8> @lsl_i8_all_active(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 7, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -1627,9 +1627,9 @@ define <vscale x 8 x i16> @lsl_i16_all_active(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 15, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -1680,9 +1680,9 @@ define <vscale x 4 x i32> @lsl_i32_all_active(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 31, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -1733,9 +1733,9 @@ define <vscale x 2 x i64> @lsl_i64_all_active(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 63, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -1788,9 +1788,9 @@ define <vscale x 16 x i8> @lsr_i8_all_active(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -1828,9 +1828,9 @@ define <vscale x 8 x i16> @lsr_i16_all_active(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -1868,9 +1868,9 @@ define <vscale x 4 x i32> @lsr_i32_all_active(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -1908,9 +1908,9 @@ define <vscale x 2 x i64> @lsr_i64_all_active(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -1936,9 +1936,9 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -1952,9 +1952,9 @@ define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -1989,9 +1989,9 @@ define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -2005,9 +2005,9 @@ define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -2025,20 +2025,22 @@ define <vscale x 4 x i32> @mul_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
   %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                     <vscale x 4 x i32> %a,
-                                                                     <vscale x 4 x i32> %b)
+                                                                    <vscale x 4 x i32> %a,
+                                                                    <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
 declare <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
@@ -2065,45 +2067,70 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16>,
 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
+declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
index 85eace3b1bb11..7cdedeee2cada 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
@@ -12,9 +12,9 @@ define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
   %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 7, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -25,9 +25,9 @@ define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
   %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 240, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -38,9 +38,9 @@ define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
   %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -51,9 +51,9 @@ define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -68,9 +68,9 @@ define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
   %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 254, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -81,9 +81,9 @@ define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
   %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 65534, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -94,9 +94,9 @@ define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
   %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -107,9 +107,9 @@ define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -124,9 +124,9 @@ define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
   %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 15, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -137,9 +137,9 @@ define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
   %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 64519, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -150,9 +150,9 @@ define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
   %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -163,9 +163,9 @@ define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 281474976710656, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -180,9 +180,9 @@ define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
   %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 6, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -193,9 +193,9 @@ define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
   %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 32769, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -206,9 +206,9 @@ define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
   %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 65535, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -219,9 +219,9 @@ define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a) {
 ; CHECK-NEXT:    ret
   %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 9222246136947933184, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -234,9 +234,9 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a) {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -250,9 +250,9 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a) {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 65535)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -275,25 +275,27 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a) {
   ret <vscale x 4 x i32> %out
 }
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
index 6d240b92cdda3..baef30b3bce94 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
@@ -11,9 +11,9 @@ define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    add z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -23,9 +23,9 @@ define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -35,9 +35,9 @@ define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -47,9 +47,9 @@ define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -63,9 +63,9 @@ define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    sub z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -75,9 +75,9 @@ define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -87,9 +87,9 @@ define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -99,9 +99,9 @@ define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; CHECK-NEXT:    sub z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -113,9 +113,9 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x
 ; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -128,9 +128,9 @@ define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -161,9 +161,9 @@ define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    mul z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -173,9 +173,9 @@ define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    mul z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -185,9 +185,9 @@ define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    mul z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -197,9 +197,9 @@ define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; CHECK-NEXT:    mul z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -213,9 +213,9 @@ define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 ; CHECK-NEXT:    smulh z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                 <vscale x 16 x i8> %a,
-                                                                 <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                   <vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -225,9 +225,9 @@ define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 ; CHECK-NEXT:    smulh z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -237,9 +237,9 @@ define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 ; CHECK-NEXT:    smulh z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                 <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -249,9 +249,9 @@ define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; CHECK-NEXT:    smulh z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                 <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -265,9 +265,9 @@ define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 ; CHECK-NEXT:    umulh z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                 <vscale x 16 x i8> %a,
-                                                                 <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                   <vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -277,9 +277,9 @@ define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 ; CHECK-NEXT:    umulh z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -289,9 +289,9 @@ define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 ; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                 <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -301,9 +301,9 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; CHECK-NEXT:    umulh z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                 <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -315,9 +315,9 @@ define <vscale x 4 x i32> @umulh_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale
 ; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                      <vscale x 4 x i32> %a,
-                                                                      <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                        <vscale x 4 x i32> %a,
+                                                                        <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -330,9 +330,9 @@ define <vscale x 4 x i32> @umulh_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                      <vscale x 4 x i32> %a,
-                                                                      <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                        <vscale x 4 x i32> %a,
+                                                                        <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -363,9 +363,9 @@ define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -375,9 +375,9 @@ define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -387,9 +387,9 @@ define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -399,9 +399,9 @@ define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -415,9 +415,9 @@ define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    bic z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -427,9 +427,9 @@ define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    bic z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -439,9 +439,9 @@ define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    bic z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -451,9 +451,9 @@ define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; CHECK-NEXT:    bic z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -467,9 +467,9 @@ define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -479,9 +479,9 @@ define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -491,9 +491,9 @@ define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -503,9 +503,9 @@ define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -519,9 +519,9 @@ define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %pg,
-                                                               <vscale x 16 x i8> %a,
-                                                               <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                 <vscale x 16 x i8> %a,
+                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -531,9 +531,9 @@ define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %pg,
-                                                               <vscale x 8 x i16> %a,
-                                                               <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x i16> %a,
+                                                                 <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -543,9 +543,9 @@ define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg,
-                                                               <vscale x 4 x i32> %a,
-                                                               <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                 <vscale x 4 x i32> %a,
+                                                                 <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -555,9 +555,9 @@ define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %pg,
-                                                               <vscale x 2 x i64> %a,
-                                                               <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                 <vscale x 2 x i64> %a,
+                                                                 <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -569,9 +569,9 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x
 ; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -584,9 +584,9 @@ define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -669,9 +669,9 @@ define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 ; CHECK-NEXT:    sqsub z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                 <vscale x 16 x i8> %a,
-                                                                 <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                   <vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -681,9 +681,9 @@ define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 ; CHECK-NEXT:    sqsub z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -693,9 +693,9 @@ define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 ; CHECK-NEXT:    sqsub z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                 <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -705,9 +705,9 @@ define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; CHECK-NEXT:    sqsub z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                 <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -773,9 +773,9 @@ define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b
 ; CHECK-NEXT:    uqsub z0.b, z0.b, z1.b
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                 <vscale x 16 x i8> %a,
-                                                                 <vscale x 16 x i8> %b)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                   <vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %out
 }
 
@@ -785,9 +785,9 @@ define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %
 ; CHECK-NEXT:    uqsub z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %out
 }
 
@@ -797,9 +797,9 @@ define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
 ; CHECK-NEXT:    uqsub z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                 <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -809,9 +809,9 @@ define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
 ; CHECK-NEXT:    uqsub z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                 <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
@@ -823,9 +823,9 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale
 ; CHECK-NEXT:    ret
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                      <vscale x 4 x i32> %a,
-                                                                      <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                        <vscale x 4 x i32> %a,
+                                                                        <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -838,9 +838,9 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale
   %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                      <vscale x 4 x i32> %a,
-                                                                      <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                        <vscale x 4 x i32> %a,
+                                                                        <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -1037,9 +1037,9 @@ define <vscale x 8 x half> @fadd_half(<vscale x 8 x half> %a, <vscale x 8 x half
 ; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x half> %a,
-                                                                 <vscale x 8 x half> %b)
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %a,
+                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
 }
 
@@ -1049,9 +1049,9 @@ define <vscale x 4 x float> @fadd_float(<vscale x 4 x float> %a, <vscale x 4 x f
 ; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
-                                                                  <vscale x 4 x float> %a,
-                                                                  <vscale x 4 x float> %b)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %a,
+                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
 }
 
@@ -1061,9 +1061,9 @@ define <vscale x 2 x double> @fadd_double(<vscale x 2 x double> %a, <vscale x 2
 ; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
-                                                                   <vscale x 2 x double> %a,
-                                                                   <vscale x 2 x double> %b)
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %a,
+                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
 }
 
@@ -1077,9 +1077,9 @@ define <vscale x 8 x half> @fsub_half(<vscale x 8 x half> %a, <vscale x 8 x half
 ; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x half> %a,
-                                                                 <vscale x 8 x half> %b)
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %a,
+                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
 }
 
@@ -1089,9 +1089,9 @@ define <vscale x 4 x float> @fsub_float(<vscale x 4 x float> %a, <vscale x 4 x f
 ; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
-                                                                  <vscale x 4 x float> %a,
-                                                                  <vscale x 4 x float> %b)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %a,
+                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
 }
 
@@ -1101,9 +1101,9 @@ define <vscale x 2 x double> @fsub_double(<vscale x 2 x double> %a, <vscale x 2
 ; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
-                                                                   <vscale x 2 x double> %a,
-                                                                   <vscale x 2 x double> %b)
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %a,
+                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
 }
 
@@ -1117,9 +1117,9 @@ define <vscale x 8 x half> @fmul_half(<vscale x 8 x half> %a, <vscale x 8 x half
 ; CHECK-NEXT:    fmul z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x half> %a,
-                                                                 <vscale x 8 x half> %b)
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %a,
+                                                                   <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
 }
 
@@ -1129,9 +1129,9 @@ define <vscale x 4 x float> @fmul_float(<vscale x 4 x float> %a, <vscale x 4 x f
 ; CHECK-NEXT:    fmul z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
-                                                                  <vscale x 4 x float> %a,
-                                                                  <vscale x 4 x float> %b)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %a,
+                                                                    <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
 }
 
@@ -1141,76 +1141,84 @@ define <vscale x 2 x double> @fmul_double(<vscale x 2 x double> %a, <vscale x 2
 ; CHECK-NEXT:    fmul z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
-                                                                   <vscale x 2 x double> %a,
-                                                                   <vscale x 2 x double> %b)
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %a,
+                                                                     <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
 }
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 declare <vscale x  4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 declare <vscale x  4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 declare <vscale x  4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
 declare <vscale x 16 x  i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
 declare <vscale x  8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 declare <vscale x  4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
 declare <vscale x  2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
 declare <vscale x 16 x  i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
 declare <vscale x  8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 declare <vscale x  4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
 declare <vscale x  2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 declare <vscale x  4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
 declare <vscale x 16 x  i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 2 x i64>)
 declare <vscale x  8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x 2 x i64>)
@@ -1224,17 +1232,17 @@ declare <vscale x 16 x  i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1
 declare <vscale x  8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x 2 x i64>)
 declare <vscale x  4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x 2 x i64>)
 
-declare <vscale x 8 x   half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
-declare <vscale x 4 x  float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x   half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
+declare <vscale x 4 x  float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 
-declare <vscale x 8 x   half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
-declare <vscale x 4 x  float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x   half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
+declare <vscale x 4 x  float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 
-declare <vscale x 8 x   half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
-declare <vscale x 4 x  float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x   half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
+declare <vscale x 4 x  float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 
 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-saba.ll b/llvm/test/CodeGen/AArch64/sve-saba.ll
index 0022f1ba3450e..43a580409ffe2 100644
--- a/llvm/test/CodeGen/AArch64/sve-saba.ll
+++ b/llvm/test/CodeGen/AArch64/sve-saba.ll
@@ -55,7 +55,7 @@ define <vscale x 2 x i64> @saba_sabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64>
 ; CHECK-NEXT:    saba z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %sabd = call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> %true, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
+  %sabd = call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> %true, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   %add = add <vscale x 2 x i64> %sabd, %a
   ret <vscale x 2 x i64> %add
 }
@@ -66,7 +66,7 @@ define <vscale x 4 x i32> @saba_sabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32>
 ; CHECK-NEXT:    saba z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %sabd = call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> %true, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  %sabd = call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> %true, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   %add = add <vscale x 4 x i32> %sabd, %a
   ret <vscale x 4 x i32> %add
 }
@@ -77,7 +77,7 @@ define <vscale x 8 x i16> @saba_sabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16>
 ; CHECK-NEXT:    saba z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %sabd = call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> %true, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
+  %sabd = call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> %true, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   %add = add <vscale x 8 x i16> %sabd, %a
   ret <vscale x 8 x i16> %add
 }
@@ -88,7 +88,7 @@ define <vscale x 16 x i8> @saba_sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8>
 ; CHECK-NEXT:    saba z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %true = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %sabd = call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> %true, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
+  %sabd = call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> %true, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   %add = add <vscale x 16 x i8> %sabd, %a
   ret <vscale x 16 x i8> %add
 }
@@ -103,7 +103,7 @@ declare <vscale x  4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
 declare <vscale x  8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
 
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x  2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x  4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x  8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x  2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x  4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x  8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
index 0d1743d1b15b2..800888b7e6cb9 100644
--- a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
@@ -132,8 +132,8 @@ define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a,
 ; CHECK-NEXT:    ret
                                     <vscale x 16 x i8> %b) {
   %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
-                                                                 <vscale x 16 x i8> %b)
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %res
 }
 
@@ -144,8 +144,8 @@ define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a,
 ; CHECK-NEXT:    ret
                                      <vscale x 8 x i16> %b) {
   %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %b)
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %res
 }
 
@@ -156,8 +156,8 @@ define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a,
 ; CHECK-NEXT:    ret
                                      <vscale x 4 x i32> %b) {
   %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %b)
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %res
 }
 
@@ -168,8 +168,8 @@ define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a,
 ; CHECK-NEXT:    ret
                                      <vscale x 2 x i64> %b) {
   %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %b)
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %res
 }
 
@@ -183,8 +183,8 @@ define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a,
 ; CHECK-NEXT:    ret
                                     <vscale x 16 x i8> %b) {
   %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
-                                                                 <vscale x 16 x i8> %b)
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %res
 }
 
@@ -195,8 +195,8 @@ define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a,
 ; CHECK-NEXT:    ret
                                      <vscale x 8 x i16> %b) {
   %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %b)
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %res
 }
 
@@ -207,8 +207,8 @@ define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a,
 ; CHECK-NEXT:    ret
                                      <vscale x 4 x i32> %b) {
   %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %b)
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %res
 }
 
@@ -219,8 +219,8 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a,
 ; CHECK-NEXT:    ret
                                      <vscale x 2 x i64> %b) {
   %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %b)
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %res
 }
 
@@ -336,14 +336,14 @@ declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
-declare <vscale x 16 x  i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
-declare <vscale x  8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
-declare <vscale x  4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
-declare <vscale x  2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 declare <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll
index 6a8f53d7f31a3..8bfcd088e1a86 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-int-arith-imm.ll
@@ -111,9 +111,9 @@ define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                 <vscale x 16 x i8> %a,
-                                                                 <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                   <vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -125,9 +125,9 @@ define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -139,9 +139,9 @@ define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -153,9 +153,9 @@ define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                 <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -167,9 +167,9 @@ define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                 <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -181,9 +181,9 @@ define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                 <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -195,9 +195,9 @@ define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                 <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -311,9 +311,9 @@ define <vscale x 16 x i8> @uqsub_b_lowimm(<vscale x 16 x i8> %a) {
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
   %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
-  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg,
-                                                                 <vscale x 16 x i8> %a,
-                                                                 <vscale x 16 x i8> %splat)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                   <vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %splat)
   ret <vscale x 16 x i8> %out
 }
 
@@ -325,9 +325,9 @@ define <vscale x 8 x i16> @uqsub_h_lowimm(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -339,9 +339,9 @@ define <vscale x 8 x i16> @uqsub_h_highimm(<vscale x 8 x i16> %a) {
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
-  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
-                                                                 <vscale x 8 x i16> %a,
-                                                                 <vscale x 8 x i16> %splat)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
   ret <vscale x 8 x i16> %out
 }
 
@@ -353,9 +353,9 @@ define <vscale x 4 x i32> @uqsub_s_lowimm(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                 <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -367,9 +367,9 @@ define <vscale x 4 x i32> @uqsub_s_highimm(<vscale x 4 x i32> %a) {
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
-  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
-                                                                 <vscale x 4 x i32> %a,
-                                                                 <vscale x 4 x i32> %splat)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
   ret <vscale x 4 x i32> %out
 }
 
@@ -381,9 +381,9 @@ define <vscale x 2 x i64> @uqsub_d_lowimm(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                 <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -395,9 +395,9 @@ define <vscale x 2 x i64> @uqsub_d_highimm(<vscale x 2 x i64> %a) {
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
-  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
-                                                                 <vscale x 2 x i64> %a,
-                                                                 <vscale x 2 x i64> %splat)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
   ret <vscale x 2 x i64> %out
 }
 
@@ -410,9 +410,9 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                      <vscale x 4 x i32> %a,
-                                                                      <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                        <vscale x 4 x i32> %a,
+                                                                        <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -426,9 +426,9 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
   %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
-  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
+                                                                        <vscale x 4 x i32> %a,
+                                                                        <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -446,8 +446,8 @@ define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
   %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
   %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
   %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
-                                                                    <vscale x 4 x i32> %a,
-                                                                    <vscale x 4 x i32> %b)
+                                                                      <vscale x 4 x i32> %a,
+                                                                      <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %out
 }
 
@@ -456,20 +456,22 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <v
 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-sra.ll b/llvm/test/CodeGen/AArch64/sve2-sra.ll
index afa8dd58f9cc7..3de49c9b8768b 100644
--- a/llvm/test/CodeGen/AArch64/sve2-sra.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-sra.ll
@@ -61,7 +61,7 @@ define <vscale x 16 x i8> @usra_intr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %ins = insertelement <vscale x 16 x i8> poison, i8 1, i32 0
   %splat = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-  %shift = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %splat)
+  %shift = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %splat)
   %add = add <vscale x 16 x i8> %a, %shift
   ret <vscale x 16 x i8> %add
 }
@@ -74,7 +74,7 @@ define <vscale x 8 x i16> @usra_intr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %ins = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
   %splat = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-  %shift = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> %splat)
+  %shift = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> %splat)
   %add = add <vscale x 8 x i16> %a, %shift
   ret <vscale x 8 x i16> %add
 }
@@ -87,7 +87,7 @@ define <vscale x 4 x i32> @usra_intr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i3
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %ins = insertelement <vscale x 4 x i32> poison, i32 3, i32 0
   %splat = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-  %shift = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %splat)
+  %shift = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %splat)
   %add = add <vscale x 4 x i32> %a, %shift
   ret <vscale x 4 x i32> %add
 }
@@ -100,7 +100,7 @@ define <vscale x 2 x i64> @usra_intr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i6
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %ins = insertelement <vscale x 2 x i64> poison, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-  %shift = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %splat)
+  %shift = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %splat)
   %add = add <vscale x 2 x i64> %a, %shift
   ret <vscale x 2 x i64> %add
 }
@@ -211,7 +211,7 @@ define <vscale x 16 x i8> @ssra_intr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8
   %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %ins = insertelement <vscale x 16 x i8> poison, i8 1, i32 0
   %splat = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
-  %shift = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %splat)
+  %shift = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %splat)
   %add = add <vscale x 16 x i8> %a, %shift
   ret <vscale x 16 x i8> %add
 }
@@ -224,7 +224,7 @@ define <vscale x 8 x i16> @ssra_intr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1
   %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %ins = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
   %splat = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
-  %shift = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> %splat)
+  %shift = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %b, <vscale x 8 x i16> %splat)
   %add = add <vscale x 8 x i16> %a, %shift
   ret <vscale x 8 x i16> %add
 }
@@ -237,7 +237,7 @@ define <vscale x 4 x i32> @ssra_intr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i3
   %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %ins = insertelement <vscale x 4 x i32> poison, i32 3, i32 0
   %splat = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-  %shift = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %splat)
+  %shift = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %splat)
   %add = add <vscale x 4 x i32> %a, %shift
   ret <vscale x 4 x i32> %add
 }
@@ -250,7 +250,7 @@ define <vscale x 2 x i64> @ssra_intr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i6
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %ins = insertelement <vscale x 2 x i64> poison, i64 4, i32 0
   %splat = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-  %shift = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %splat)
+  %shift = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %splat)
   %add = add <vscale x 2 x i64> %a, %shift
   ret <vscale x 2 x i64> %add
 }
@@ -308,21 +308,11 @@ declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg)
 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 immarg)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)


        


More information about the llvm-commits mailing list