[llvm] 8dc89e3 - [LLVM][InstCombine] Enable constant folding for SVE asr,lsl and lsr intrinsics. (#137350)

Wed Apr 30 05:21:50 PDT 2025

Author: Paul Walker
Date: 2025-04-30T13:21:46+01:00
New Revision: 8dc89e34199b6b1d0f5ea1f9940ccc8e957eef0a

URL: https://github.com/llvm/llvm-project/commit/8dc89e34199b6b1d0f5ea1f9940ccc8e957eef0a
DIFF: https://github.com/llvm/llvm-project/commit/8dc89e34199b6b1d0f5ea1f9940ccc8e957eef0a.diff

LOG: [LLVM][InstCombine] Enable constant folding for SVE asr,lsl and lsr intrinsics. (#137350)

The SVE intrinsics support shift amounts greater-than-or-equal to the
element type's bit-length, essentially saturating the shift amount to
the bit-length. However, the IR instructions consider this undefined
behaviour that results in poison. To account for this we now ignore the
result of the simplifications that result in poison. This allows
existing code to be used to simplify the shifts but does mean:

1) We don't simplify cases like "svlsl_s32(x, splat(32)) => 0".
2) We no longer constant fold cases like "svadd(poison, X) => poison"

For (1) we'd need dedicated target specific combines anyway and the
result of (2) is not specified by the ACLE and replicating LLVM IR
behaviour might be confusing to ACLE writers.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index fcc5eb1c05ba0..7b1d203560a27 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1327,11 +1327,14 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
   case Intrinsic::aarch64_sve_umulh:
     return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umulh_u);
   case Intrinsic::aarch64_sve_asr:
-    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_asr_u);
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_asr_u)
+        .setMatchingIROpcode(Instruction::AShr);
   case Intrinsic::aarch64_sve_lsl:
-    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsl_u);
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsl_u)
+        .setMatchingIROpcode(Instruction::Shl);
   case Intrinsic::aarch64_sve_lsr:
-    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsr_u);
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsr_u)
+        .setMatchingIROpcode(Instruction::LShr);
   case Intrinsic::aarch64_sve_and:
     return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_and_u)
         .setMatchingIROpcode(Instruction::And);
@@ -1354,6 +1357,9 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
   case Intrinsic::aarch64_sve_and_u:
     return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
         Instruction::And);
+  case Intrinsic::aarch64_sve_asr_u:
+    return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
+        Instruction::AShr);
   case Intrinsic::aarch64_sve_eor_u:
     return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
         Instruction::Xor);
@@ -1369,6 +1375,12 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
   case Intrinsic::aarch64_sve_fsub_u:
     return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
         Instruction::FSub);
+  case Intrinsic::aarch64_sve_lsl_u:
+    return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
+        Instruction::Shl);
+  case Intrinsic::aarch64_sve_lsr_u:
+    return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
+        Instruction::LShr);
   case Intrinsic::aarch64_sve_mul_u:
     return SVEIntrinsicInfo::defaultUndefOp().setMatchingIROpcode(
         Instruction::Mul);
@@ -1571,7 +1583,11 @@ simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II,
   else
     SimpleII = simplifyBinOp(Opc, Op1, Op2, DL);
 
-  if (!SimpleII)
+  // An SVE intrinsic's result is always defined. However, this is not the case
+  // for its equivalent IR instruction (e.g. when shifting by an amount more
+  // than the data's bitwidth). Simplifications to an undefined result must be
+  // ignored to preserve the intrinsic's expected behaviour.
+  if (!SimpleII || isa<UndefValue>(SimpleII))
     return std::nullopt;
 
   if (IInfo.inactiveLanesAreNotDefined())

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
index bec5596907063..3aea04b702f4d 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
@@ -107,7 +107,8 @@ define <vscale x 4 x i32> @constant_mul_u_after_striping_inactive_lanes(<vscale
 define <vscale x 4 x i32> @dont_propagate_poison(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @dont_propagate_poison(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 1))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
 ;
   %r = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 1))
   ret <vscale x 4 x i32> %r

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll
index 400dc0dc8f8ee..39e79573628c3 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll
@@ -6,8 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+; CHECK-NEXT:    ret <vscale x 16 x i8> splat (i8 7)
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
   ret <vscale x 16 x i8> %r
@@ -16,7 +15,7 @@ define <vscale x 16 x i8> @constant_asr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0
 define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -32), <vscale x 16 x i8> splat (i8 -63)
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
@@ -27,7 +26,7 @@ define <vscale x 16 x i8> @constant_asr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0
 define <vscale x 16 x i8> @constant_asr_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_asr_i8_shift_by_7(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 7))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -1), <vscale x 16 x i8> splat (i8 -128)
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 7))
@@ -50,7 +49,7 @@ define <vscale x 16 x i8> @constant_asr_i8_shift_by_8(<vscale x 16 x i1> %pg) #0
 define <vscale x 8 x i16> @constant_asr_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 8 x i16> @constant_asr_i16_shift_by_15(
 ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 15))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -1), <vscale x 8 x i16> splat (i16 -32768)
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
 ;
   %r = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 15))
@@ -73,7 +72,7 @@ define <vscale x 8 x i16> @constant_asr_i16_shift_by_16(<vscale x 8 x i1> %pg) #
 define <vscale x 4 x i32> @constant_asr_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @constant_asr_i32_shift_by_31(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 31))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -1), <vscale x 4 x i32> splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
 ;
   %r = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 31))
@@ -96,7 +95,7 @@ define <vscale x 4 x i32> @constant_asr_i32_shift_by_32(<vscale x 4 x i1> %pg) #
 define <vscale x 2 x i64> @constant_asr_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 2 x i64> @constant_asr_i64_shift_by_63(
 ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 63))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -1), <vscale x 2 x i64> splat (i64 -9223372036854775808)
 ; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
 ;
   %r = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 63))
@@ -118,8 +117,7 @@ define <vscale x 2 x i64> @constant_asr_i64_shift_by_64(<vscale x 2 x i1> %pg) #
 define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+; CHECK-NEXT:    ret <vscale x 16 x i8> splat (i8 7)
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
   ret <vscale x 16 x i8> %r
@@ -128,7 +126,7 @@ define <vscale x 16 x i8> @constant_lsl_i8_shift_by_0(<vscale x 16 x i1> %pg) #0
 define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -126), <vscale x 16 x i8> splat (i8 -63)
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
@@ -139,7 +137,7 @@ define <vscale x 16 x i8> @constant_lsl_i8_shift_by_1(<vscale x 16 x i1> %pg) #0
 define <vscale x 16 x i8> @constant_lsl_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsl_i8_shift_by_7(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 7))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 1)
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 7))
@@ -161,7 +159,7 @@ define <vscale x 16 x i8> @constant_lsl_i8_shift_by_8(<vscale x 16 x i1> %pg) #0
 define <vscale x 8 x i16> @constant_lsl_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsl_i16_shift_by_15(
 ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 15))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 1)
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
 ;
   %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 15))
@@ -183,7 +181,7 @@ define <vscale x 8 x i16> @constant_lsl_i16_shift_by_16(<vscale x 8 x i1> %pg) #
 define <vscale x 4 x i32> @constant_lsl_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsl_i32_shift_by_31(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 31))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 1)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
 ;
   %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 31))
@@ -205,7 +203,7 @@ define <vscale x 4 x i32> @constant_lsl_i32_shift_by_32(<vscale x 4 x i1> %pg) #
 define <vscale x 2 x i64> @constant_lsl_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsl_i64_shift_by_63(
 ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 63))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 1)
 ; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
 ;
   %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 63))
@@ -226,8 +224,7 @@ define <vscale x 2 x i64> @constant_lsl_i64_shift_by_64(<vscale x 2 x i1> %pg) #
 define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
+; CHECK-NEXT:    ret <vscale x 16 x i8> splat (i8 7)
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 7), <vscale x 16 x i8> splat (i8 0))
   ret <vscale x 16 x i8> %r
@@ -236,7 +233,7 @@ define <vscale x 16 x i8> @constant_lsr_i8_shift_by_0(<vscale x 16 x i1> %pg) #0
 define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -63), <vscale x 16 x i8> splat (i8 1))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 96), <vscale x 16 x i8> splat (i8 -63)
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 193), <vscale x 16 x i8> splat (i8 1))
@@ -247,7 +244,7 @@ define <vscale x 16 x i8> @constant_lsr_i8_shift_by_1(<vscale x 16 x i1> %pg) #0
 define <vscale x 16 x i8> @constant_lsr_i8_shift_by_7(<vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @constant_lsr_i8_shift_by_7(
 ; CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 -128), <vscale x 16 x i8> splat (i8 7))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i8> splat (i8 -128)
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[R]]
 ;
   %r = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> splat (i8 128), <vscale x 16 x i8> splat (i8 7))
@@ -270,7 +267,7 @@ define <vscale x 16 x i8> @constant_lsr_i8_shift_by_8(<vscale x 16 x i1> %pg) #0
 define <vscale x 8 x i16> @constant_lsr_i16_shift_by_15(<vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 8 x i16> @constant_lsr_i16_shift_by_15(
 ; CHECK-SAME: <vscale x 8 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 -32768), <vscale x 8 x i16> splat (i16 15))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> splat (i16 1), <vscale x 8 x i16> splat (i16 -32768)
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[R]]
 ;
   %r = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> splat (i16 32768), <vscale x 8 x i16> splat (i16 15))
@@ -293,7 +290,7 @@ define <vscale x 8 x i16> @constant_lsr_i16_shift_by_16(<vscale x 8 x i1> %pg) #
 define <vscale x 4 x i32> @constant_lsr_i32_shift_by_31(<vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @constant_lsr_i32_shift_by_31(
 ; CHECK-SAME: <vscale x 4 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 -2147483648), <vscale x 4 x i32> splat (i32 31))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> splat (i32 1), <vscale x 4 x i32> splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
 ;
   %r = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> splat (i32 2147483648), <vscale x 4 x i32> splat (i32 31))
@@ -316,7 +313,7 @@ define <vscale x 4 x i32> @constant_lsr_i32_shift_by_32(<vscale x 4 x i1> %pg) #
 define <vscale x 2 x i64> @constant_lsr_i64_shift_by_63(<vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: define <vscale x 2 x i64> @constant_lsr_i64_shift_by_63(
 ; CHECK-SAME: <vscale x 2 x i1> [[PG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 -9223372036854775808), <vscale x 2 x i64> splat (i64 63))
+; CHECK-NEXT:    [[R:%.*]] = select <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> splat (i64 1), <vscale x 2 x i64> splat (i64 -9223372036854775808)
 ; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
 ;
   %r = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> splat (i64 9223372036854775808), <vscale x 2 x i64> splat (i64 63))