[llvm] 9ff2ef9 - [AArch64][SVE] Define pseudos for arithmetic immediate instructions. (#188579)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 03:07:51 PDT 2026
Author: Ricardo Jesus
Date: 2026-04-02T11:07:46+01:00
New Revision: 9ff2ef97111bf3b281b1b8950ddfa7ba4d01c281
URL: https://github.com/llvm/llvm-project/commit/9ff2ef97111bf3b281b1b8950ddfa7ba4d01c281
DIFF: https://github.com/llvm/llvm-project/commit/9ff2ef97111bf3b281b1b8950ddfa7ba4d01c281.diff
LOG: [AArch64][SVE] Define pseudos for arithmetic immediate instructions. (#188579)
This patch uses DestructiveBinaryShImmUnpred (which was previously
unused as far as I could tell) to define pseudos for arithmetic
immediate instructions such as ADD (immediate), which allows using
MOVPRFX with these instructions.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
llvm/test/CodeGen/AArch64/sve-mask-partition.ll
llvm/test/CodeGen/AArch64/sve-stepvector.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
llvm/test/CodeGen/AArch64/sve2-bsl.ll
llvm/test/CodeGen/AArch64/sve2-rsh.ll
llvm/test/CodeGen/AArch64/sve2-xar.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 4ab8adeb2c9bc..638a3a59b983e 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -537,6 +537,9 @@ bool AArch64ExpandPseudoImpl::expand_DestructiveOp(
// ==> MOVPRFX Zd Zs; EXT_ZZI Zd, Zd, Zs, Imm
std::tie(DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 1, 2);
break;
+ case AArch64::DestructiveBinaryShImmUnpred:
+ std::tie(DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3);
+ break;
default:
llvm_unreachable("Unsupported Destructive Operand type");
}
@@ -557,6 +560,7 @@ bool AArch64ExpandPseudoImpl::expand_DestructiveOp(
break;
case AArch64::DestructiveUnaryPassthru:
case AArch64::DestructiveBinaryImm:
+ case AArch64::DestructiveBinaryShImmUnpred:
case AArch64::Destructive2xRegImmUnpred:
DOPRegIsUnique = true;
break;
@@ -684,6 +688,7 @@ bool AArch64ExpandPseudoImpl::expand_DestructiveOp(
.add(MI.getOperand(SrcIdx))
.add(MI.getOperand(Src2Idx));
break;
+ case AArch64::DestructiveBinaryShImmUnpred:
case AArch64::Destructive2xRegImmUnpred:
DOP.addReg(MI.getOperand(DOPIdx).getReg(), DOPRegState)
.add(MI.getOperand(SrcIdx))
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 8c0dd4381fae8..858214464ef33 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1192,6 +1192,9 @@ bool AArch64RegisterInfo::getRegAllocationHints(
case AArch64::DestructiveUnaryPassthru:
AddHintIfSuitable(R, Def.getOperand(3));
break;
+ case AArch64::DestructiveBinaryShImmUnpred:
+ AddHintIfSuitable(R, Def.getOperand(1));
+ break;
}
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c5a3bd504adf9..da6d2d0dbfb7a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4590,16 +4590,16 @@ let Predicates = [HasSVE_or_SME] in {
}
// Arith operations: ADD
- def : PromoteNEONToSVEImmWithShift<add, v2i64, NEONSplatOfSVEAddSubImm, ADD_ZI_D>;
- def : PromoteNEONToSVEImmWithShift<add, v4i32, NEONSplatOfSVEAddSubImm, ADD_ZI_S>;
- def : PromoteNEONToSVEImmWithShift<add, v8i16, NEONSplatOfSVEAddSubImm, ADD_ZI_H>;
- def : PromoteNEONToSVEImmWithShift<add, v16i8, NEONSplatOfSVEAddSubImm, ADD_ZI_B>;
+ def : PromoteNEONToSVEImmWithShift<add, v2i64, NEONSplatOfSVEAddSubImm, ADD_ZI_D_PSEUDO>;
+ def : PromoteNEONToSVEImmWithShift<add, v4i32, NEONSplatOfSVEAddSubImm, ADD_ZI_S_PSEUDO>;
+ def : PromoteNEONToSVEImmWithShift<add, v8i16, NEONSplatOfSVEAddSubImm, ADD_ZI_H_PSEUDO>;
+ def : PromoteNEONToSVEImmWithShift<add, v16i8, NEONSplatOfSVEAddSubImm, ADD_ZI_B_PSEUDO>;
// Arith operations: SUB
- def : PromoteNEONToSVEImmWithShift<sub, v2i64, NEONSplatOfSVEAddSubImm, SUB_ZI_D>;
- def : PromoteNEONToSVEImmWithShift<sub, v4i32, NEONSplatOfSVEAddSubImm, SUB_ZI_S>;
- def : PromoteNEONToSVEImmWithShift<sub, v8i16, NEONSplatOfSVEAddSubImm, SUB_ZI_H>;
- def : PromoteNEONToSVEImmWithShift<sub, v16i8, NEONSplatOfSVEAddSubImm, SUB_ZI_B>;
+ def : PromoteNEONToSVEImmWithShift<sub, v2i64, NEONSplatOfSVEAddSubImm, SUB_ZI_D_PSEUDO>;
+ def : PromoteNEONToSVEImmWithShift<sub, v4i32, NEONSplatOfSVEAddSubImm, SUB_ZI_S_PSEUDO>;
+ def : PromoteNEONToSVEImmWithShift<sub, v8i16, NEONSplatOfSVEAddSubImm, SUB_ZI_H_PSEUDO>;
+ def : PromoteNEONToSVEImmWithShift<sub, v16i8, NEONSplatOfSVEAddSubImm, SUB_ZI_B_PSEUDO>;
// Arith operations: MUL
def : PromoteNEONToSVEImm<mul, v2i64, NEONSplatOfSVEAddSubImm, MUL_ZI_D, i32>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index a279e28cf8da5..d333dfcc50d33 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -1504,7 +1504,7 @@ def : InstRW<[V1Write_2c_1V01],
"^ADR_LSL_ZZZ_[SD]_[0123]$",
"^[SU]ABD_ZP[mZ]Z_[BHSD]",
"^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]",
- "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$",
+ "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]",
"^SUBR_Z(I|P[mZ]Z)_[BHSD]",
"^(AND|EOR|ORR)_ZI$",
"^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZP?ZZ",
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8a3f52090ab4c..2004aa43e0667 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -827,6 +827,13 @@ class UnpredRegImmPseudo<ZPRRegOp zprty, Operand immty>
Pseudo<(outs zprty:$Zd), (ins zprty:$Zs, immty:$imm), []> {
}
+class UnpredTwoOpImmPseudo<string name, ZPRRegOp zprty, Operand immty>
+: SVEPseudo2Instr<name, 0>,
+ Pseudo<(outs zprty:$Zd), (ins zprty:$Zs1, immty:$imm), []> {
+ // There are no false lanes. Setting this helps register allocation hints.
+ let FalseLanes = FalseLanesUndef;
+}
+
//
// Pseudos for passthru operands
//
@@ -5275,7 +5282,7 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),
asm, "\t$Zdn, $_Zdn, $imm",
"",
- []>, Sched<[]> {
+ []>, SVEPseudo2Instr<NAME, 1>, Sched<[]> {
bits<5> Zdn;
bits<9> imm;
let Inst{31-24} = 0b00100101;
@@ -5288,7 +5295,7 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
- let DestructiveInstType = DestructiveOther;
+ let DestructiveInstType = DestructiveBinaryShImmUnpred;
let ElementSize = ElementSizeNone;
let hasSideEffects = 0;
}
@@ -5300,16 +5307,21 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op,
def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubImm8Pat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubImm16Pat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubImm32Pat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
+ def _B_PSEUDO : UnpredTwoOpImmPseudo<NAME # _B, ZPR8, addsub_imm8_opt_lsl_i8>;
+ def _H_PSEUDO : UnpredTwoOpImmPseudo<NAME # _H, ZPR16, addsub_imm8_opt_lsl_i16>;
+ def _S_PSEUDO : UnpredTwoOpImmPseudo<NAME # _S, ZPR32, addsub_imm8_opt_lsl_i32>;
+ def _D_PSEUDO : UnpredTwoOpImmPseudo<NAME # _D, ZPR64, addsub_imm8_opt_lsl_i64>;
+
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubImm8Pat, !cast<Pseudo>(NAME # _B_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubImm16Pat, !cast<Pseudo>(NAME # _H_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubImm32Pat, !cast<Pseudo>(NAME # _S_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Pseudo>(NAME # _D_PSEUDO)>;
// Extra patterns for add(x, splat(-ve)) -> sub(x, +ve). There is no i8
// pattern as all i8 constants can be handled by an add.
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubNegImm16Pat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubNegImm32Pat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubNegImm64Pat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubNegImm16Pat, !cast<Pseudo>(NAME # _H_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubNegImm32Pat, !cast<Pseudo>(NAME # _S_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubNegImm64Pat, !cast<Pseudo>(NAME # _D_PSEUDO)>;
}
multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op,
@@ -5319,15 +5331,20 @@ multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op
def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubSSatPosImm8Pat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatPosImm16Pat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatPosImm32Pat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatPosImm64Pat, !cast<Instruction>(NAME # _D)>;
+ def _B_PSEUDO : UnpredTwoOpImmPseudo<NAME # _B, ZPR8, addsub_imm8_opt_lsl_i8>;
+ def _H_PSEUDO : UnpredTwoOpImmPseudo<NAME # _H, ZPR16, addsub_imm8_opt_lsl_i16>;
+ def _S_PSEUDO : UnpredTwoOpImmPseudo<NAME # _S, ZPR32, addsub_imm8_opt_lsl_i32>;
+ def _D_PSEUDO : UnpredTwoOpImmPseudo<NAME # _D, ZPR64, addsub_imm8_opt_lsl_i64>;
+
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8, i32, SVEAddSubSSatPosImm8Pat, !cast<Pseudo>(NAME # _B_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatPosImm16Pat, !cast<Pseudo>(NAME # _H_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatPosImm32Pat, !cast<Pseudo>(NAME # _S_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatPosImm64Pat, !cast<Pseudo>(NAME # _D_PSEUDO)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, inv_op, ZPR8, i32, SVEAddSubSSatNegImm8Pat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubSSatNegImm16Pat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubSSatNegImm32Pat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubSSatNegImm64Pat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, inv_op, ZPR8, i32, SVEAddSubSSatNegImm8Pat, !cast<Pseudo>(NAME # _B_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, inv_op, ZPR16, i32, SVEAddSubSSatNegImm16Pat, !cast<Pseudo>(NAME # _H_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, inv_op, ZPR32, i32, SVEAddSubSSatNegImm32Pat, !cast<Pseudo>(NAME # _S_PSEUDO)>;
+ def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, inv_op, ZPR64, i64, SVEAddSubSSatNegImm64Pat, !cast<Pseudo>(NAME # _D_PSEUDO)>;
}
class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
index f0abbaac2e68c..b9ffab24d86b0 100644
--- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
+++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
@@ -1120,14 +1120,14 @@ define <vscale x 4 x i64> @fshl_illegal_i64(<vscale x 4 x i64> %a, <vscale x 4 x
define <vscale x 2 x i64> @fshl_rot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
; CHECK-LABEL: fshl_rot_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, z1.d
-; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0
+; CHECK-NEXT: and z1.d, z1.d, #0x3f
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z2.d, z2.d, #0x3f
-; CHECK-NEXT: and z1.d, z1.d, #0x3f
-; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d
-; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
+; CHECK-NEXT: lslr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: orr z0.d, z1.d, z0.d
; CHECK-NEXT: ret
%fshl = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %fshl
@@ -1137,21 +1137,21 @@ define <vscale x 2 x i64> @fshl_rot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64
define <vscale x 4 x i64> @fshl_rot_illegal_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b){
; CHECK-LABEL: fshl_rot_illegal_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z4.d, z2.d
-; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0
-; CHECK-NEXT: mov z5.d, z3.d
-; CHECK-NEXT: subr z3.d, z3.d, #0 // =0x0
+; CHECK-NEXT: movprfx z4, z2
+; CHECK-NEXT: subr z4.d, z4.d, #0 // =0x0
+; CHECK-NEXT: movprfx z5, z3
+; CHECK-NEXT: subr z5.d, z5.d, #0 // =0x0
+; CHECK-NEXT: and z2.d, z2.d, #0x3f
+; CHECK-NEXT: and z3.d, z3.d, #0x3f
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z4.d, z4.d, #0x3f
-; CHECK-NEXT: and z2.d, z2.d, #0x3f
; CHECK-NEXT: and z5.d, z5.d, #0x3f
-; CHECK-NEXT: and z3.d, z3.d, #0x3f
-; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z0.d
-; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: lslr z5.d, p0/m, z5.d, z1.d
-; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d
-; CHECK-NEXT: orr z0.d, z4.d, z0.d
-; CHECK-NEXT: orr z1.d, z5.d, z1.d
+; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: lslr z3.d, p0/m, z3.d, z1.d
+; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z4.d
+; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z5.d
+; CHECK-NEXT: orr z0.d, z2.d, z0.d
+; CHECK-NEXT: orr z1.d, z3.d, z1.d
; CHECK-NEXT: ret
%fshl = call <vscale x 4 x i64> @llvm.fshl.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %a, <vscale x 4 x i64> %b)
ret <vscale x 4 x i64> %fshl
@@ -1188,14 +1188,14 @@ define <vscale x 2 x i64> @fshr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
define <vscale x 2 x i64> @fshr_rot_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
; CHECK-LABEL: fshr_rot_i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, z1.d
-; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0
+; CHECK-NEXT: and z1.d, z1.d, #0x3f
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z2.d, z2.d, #0x3f
-; CHECK-NEXT: and z1.d, z1.d, #0x3f
-; CHECK-NEXT: lsrr z2.d, p0/m, z2.d, z0.d
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
+; CHECK-NEXT: lsrr z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z2.d
+; CHECK-NEXT: orr z0.d, z1.d, z0.d
; CHECK-NEXT: ret
%fshr = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %fshr
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
index 258e399018ba8..d3a68ded5fe79 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
@@ -15,12 +15,12 @@ define void @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
-; CHECK-NEXT: ld1w { z3.s }, p0/z, [x2]
-; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2]
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
-; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
-; CHECK-NEXT: and z0.d, z0.d, z3.d
-; CHECK-NEXT: and z1.d, z2.d, z1.d
+; CHECK-NEXT: and z1.d, z3.d, z1.d
+; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x3]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-mask-partition.ll b/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
index 9aa673ee6cce8..f561e2fd4b350 100644
--- a/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
+++ b/llvm/test/CodeGen/AArch64/sve-mask-partition.ll
@@ -306,88 +306,89 @@ define <32 x i1> @mask_exclude_active_v32(<32 x i1> %mask.in) {
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: cmpne p2.b, p0/z, z1.b, #0
; CHECK-NEXT: brkb p1.b, p0/z, p1.b
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z6.d, z0.d
-; CHECK-NEXT: mov z4.d, z0.d
-; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: add z1.d, z1.d, #14 // =0xe
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: add z2.d, z2.d, #12 // =0xc
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: add z3.d, z3.d, #10 // =0xa
+; CHECK-NEXT: movprfx z4, z0
+; CHECK-NEXT: add z4.d, z4.d, #8 // =0x8
+; CHECK-NEXT: movprfx z5, z0
+; CHECK-NEXT: add z5.d, z5.d, #6 // =0x6
; CHECK-NEXT: brkb p0.b, p0/z, p2.b
-; CHECK-NEXT: mov z5.d, z0.d
-; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: movprfx z6, z0
+; CHECK-NEXT: add z6.d, z6.d, #4 // =0x4
+; CHECK-NEXT: movprfx z7, z0
+; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2
; CHECK-NEXT: cntp x9, p1, p1.b
-; CHECK-NEXT: mov z16.d, z0.d
-; CHECK-NEXT: mov z17.d, z0.d
+; CHECK-NEXT: movprfx z17, z0
+; CHECK-NEXT: add z17.d, z17.d, #30 // =0x1e
+; CHECK-NEXT: movprfx z18, z0
+; CHECK-NEXT: add z18.d, z18.d, #28 // =0x1c
; CHECK-NEXT: cntp x10, p0, p0.b
-; CHECK-NEXT: mov z18.d, z0.d
-; CHECK-NEXT: mov z19.d, z0.d
-; CHECK-NEXT: mov z20.d, z0.d
-; CHECK-NEXT: mov z21.d, z0.d
-; CHECK-NEXT: mov z22.d, z0.d
-; CHECK-NEXT: add z1.d, z1.d, #14 // =0xe
-; CHECK-NEXT: add z3.d, z3.d, #12 // =0xc
-; CHECK-NEXT: add z6.d, z6.d, #10 // =0xa
+; CHECK-NEXT: movprfx z19, z0
+; CHECK-NEXT: add z19.d, z19.d, #26 // =0x1a
+; CHECK-NEXT: movprfx z20, z0
+; CHECK-NEXT: add z20.d, z20.d, #24 // =0x18
+; CHECK-NEXT: movprfx z21, z0
+; CHECK-NEXT: add z21.d, z21.d, #22 // =0x16
+; CHECK-NEXT: movprfx z22, z0
+; CHECK-NEXT: add z22.d, z22.d, #20 // =0x14
+; CHECK-NEXT: movprfx z23, z0
+; CHECK-NEXT: add z23.d, z23.d, #18 // =0x12
+; CHECK-NEXT: movprfx z24, z0
+; CHECK-NEXT: add z24.d, z24.d, #16 // =0x10
; CHECK-NEXT: cmp x9, #16
-; CHECK-NEXT: add z4.d, z4.d, #8 // =0x8
-; CHECK-NEXT: add z2.d, z2.d, #6 // =0x6
; CHECK-NEXT: add x10, x10, #16
-; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4
-; CHECK-NEXT: add z7.d, z7.d, #2 // =0x2
; CHECK-NEXT: csel x9, x9, x10, ne
-; CHECK-NEXT: add z16.d, z16.d, #30 // =0x1e
-; CHECK-NEXT: add z17.d, z17.d, #28 // =0x1c
-; CHECK-NEXT: dup v23.2d, x9
-; CHECK-NEXT: add z18.d, z18.d, #26 // =0x1a
-; CHECK-NEXT: add z19.d, z19.d, #24 // =0x18
-; CHECK-NEXT: add z20.d, z20.d, #22 // =0x16
-; CHECK-NEXT: add z21.d, z21.d, #20 // =0x14
-; CHECK-NEXT: add z22.d, z22.d, #18 // =0x12
+; CHECK-NEXT: dup v16.2d, x9
; CHECK-NEXT: adrp x9, .LCPI17_0
-; CHECK-NEXT: cmhi v24.2d, v23.2d, v0.2d
-; CHECK-NEXT: add z0.d, z0.d, #16 // =0x10
-; CHECK-NEXT: cmhi v1.2d, v23.2d, v1.2d
-; CHECK-NEXT: cmhi v3.2d, v23.2d, v3.2d
-; CHECK-NEXT: cmhi v6.2d, v23.2d, v6.2d
-; CHECK-NEXT: cmhi v4.2d, v23.2d, v4.2d
-; CHECK-NEXT: cmhi v16.2d, v23.2d, v16.2d
-; CHECK-NEXT: cmhi v17.2d, v23.2d, v17.2d
-; CHECK-NEXT: cmhi v18.2d, v23.2d, v18.2d
-; CHECK-NEXT: cmhi v19.2d, v23.2d, v19.2d
-; CHECK-NEXT: cmhi v20.2d, v23.2d, v20.2d
-; CHECK-NEXT: cmhi v21.2d, v23.2d, v21.2d
-; CHECK-NEXT: cmhi v22.2d, v23.2d, v22.2d
-; CHECK-NEXT: cmhi v0.2d, v23.2d, v0.2d
-; CHECK-NEXT: cmhi v2.2d, v23.2d, v2.2d
-; CHECK-NEXT: cmhi v5.2d, v23.2d, v5.2d
-; CHECK-NEXT: cmhi v7.2d, v23.2d, v7.2d
-; CHECK-NEXT: uzp1 v1.4s, v3.4s, v1.4s
-; CHECK-NEXT: uzp1 v3.4s, v17.4s, v16.4s
-; CHECK-NEXT: uzp1 v16.4s, v19.4s, v18.4s
-; CHECK-NEXT: uzp1 v17.4s, v21.4s, v20.4s
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v22.4s
-; CHECK-NEXT: uzp1 v4.4s, v4.4s, v6.4s
-; CHECK-NEXT: uzp1 v2.4s, v5.4s, v2.4s
-; CHECK-NEXT: uzp1 v5.4s, v24.4s, v7.4s
-; CHECK-NEXT: uzp1 v3.8h, v16.8h, v3.8h
-; CHECK-NEXT: uzp1 v0.8h, v0.8h, v17.8h
-; CHECK-NEXT: uzp1 v1.8h, v4.8h, v1.8h
-; CHECK-NEXT: uzp1 v2.8h, v5.8h, v2.8h
-; CHECK-NEXT: uzp1 v0.16b, v0.16b, v3.16b
-; CHECK-NEXT: uzp1 v1.16b, v2.16b, v1.16b
+; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d
+; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d
+; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d
+; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d
+; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d
+; CHECK-NEXT: cmhi v17.2d, v16.2d, v17.2d
+; CHECK-NEXT: cmhi v18.2d, v16.2d, v18.2d
+; CHECK-NEXT: cmhi v19.2d, v16.2d, v19.2d
+; CHECK-NEXT: cmhi v20.2d, v16.2d, v20.2d
+; CHECK-NEXT: cmhi v21.2d, v16.2d, v21.2d
+; CHECK-NEXT: cmhi v22.2d, v16.2d, v22.2d
+; CHECK-NEXT: cmhi v23.2d, v16.2d, v23.2d
+; CHECK-NEXT: cmhi v24.2d, v16.2d, v24.2d
+; CHECK-NEXT: cmhi v6.2d, v16.2d, v6.2d
+; CHECK-NEXT: cmhi v7.2d, v16.2d, v7.2d
+; CHECK-NEXT: cmhi v0.2d, v16.2d, v0.2d
+; CHECK-NEXT: uzp1 v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uzp1 v2.4s, v18.4s, v17.4s
+; CHECK-NEXT: uzp1 v16.4s, v20.4s, v19.4s
+; CHECK-NEXT: uzp1 v17.4s, v22.4s, v21.4s
+; CHECK-NEXT: uzp1 v3.4s, v4.4s, v3.4s
+; CHECK-NEXT: uzp1 v18.4s, v24.4s, v23.4s
+; CHECK-NEXT: uzp1 v4.4s, v6.4s, v5.4s
+; CHECK-NEXT: uzp1 v0.4s, v0.4s, v7.4s
+; CHECK-NEXT: uzp1 v2.8h, v16.8h, v2.8h
+; CHECK-NEXT: uzp1 v1.8h, v3.8h, v1.8h
+; CHECK-NEXT: uzp1 v5.8h, v18.8h, v17.8h
+; CHECK-NEXT: uzp1 v0.8h, v0.8h, v4.8h
+; CHECK-NEXT: uzp1 v2.16b, v5.16b, v2.16b
+; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: shl v1.16b, v2.16b, #7
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_0]
; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: shl v1.16b, v1.16b, #7
-; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT: zip1 v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: zip1 v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: addv h0, v0.8h
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: zip1 v1.16b, v1.16b, v2.16b
+; CHECK-NEXT: zip1 v0.16b, v0.16b, v3.16b
; CHECK-NEXT: addv h1, v1.8h
-; CHECK-NEXT: str h0, [x8, #2]
-; CHECK-NEXT: str h1, [x8]
+; CHECK-NEXT: addv h0, v0.8h
+; CHECK-NEXT: str h1, [x8, #2]
+; CHECK-NEXT: str h0, [x8]
; CHECK-NEXT: ret
%tz.elts = call i64 @llvm.experimental.cttz.elts.i64.v32i1(<32 x i1> %mask.in, i1 false)
%mask.out = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 0, i64 %tz.elts)
diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll
index 91c4659997789..3cff76f4d81ad 100644
--- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll
@@ -338,7 +338,7 @@ define <vscale x 8 x i16> @sub_multiple_use_stepvector_nxv8i16() {
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.h, #0, #1
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: subr z1.h, z1.h, #2 // =0x2
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index 03d8ed868e6a4..a561c884c189f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -17,18 +17,18 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ldp q5, q4, [x1]
; CHECK-NEXT: ldp q6, q7, [x2]
-; CHECK-NEXT: mov z2.d, z0.d
-; CHECK-NEXT: mov z3.d, z1.d
-; CHECK-NEXT: sub z1.s, z1.s, #1 // =0x1
-; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: movprfx z2, z1
; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
+; CHECK-NEXT: movprfx z3, z0
; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
-; CHECK-NEXT: and z2.d, z2.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z5.d
+; CHECK-NEXT: sub z1.s, z1.s, #1 // =0x1
+; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
+; CHECK-NEXT: and z3.d, z3.d, z4.d
+; CHECK-NEXT: and z2.d, z2.d, z5.d
; CHECK-NEXT: and z4.d, z0.d, z7.d
; CHECK-NEXT: and z0.d, z1.d, z6.d
-; CHECK-NEXT: orr z1.d, z4.d, z2.d
-; CHECK-NEXT: orr z0.d, z0.d, z3.d
+; CHECK-NEXT: orr z0.d, z0.d, z2.d
+; CHECK-NEXT: orr z1.d, z4.d, z3.d
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index 07072168ff089..86e8d82f61aa9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -25,9 +25,9 @@ define void @build_vector_7_inc1_v32i8(ptr %a) {
; CHECK-LABEL: build_vector_7_inc1_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
+; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: add z1.b, z1.b, #23 // =0x17
+; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
@@ -47,9 +47,9 @@ define void @build_vector_0_inc2_v16i16(ptr %a) {
; CHECK-LABEL: build_vector_0_inc2_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.h, #0, #2
-; CHECK-NEXT: str q0, [x0]
-; CHECK-NEXT: add z0.h, z0.h, #16 // =0x10
-; CHECK-NEXT: str q0, [x0, #16]
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: add z1.h, z1.h, #16 // =0x10
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: build_vector_0_inc2_v16i16:
@@ -69,9 +69,9 @@ define void @build_vector_0_dec3_v8i32(ptr %a) {
; CHECK-LABEL: build_vector_0_dec3_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #-3
-; CHECK-NEXT: str q0, [x0]
-; CHECK-NEXT: sub z0.s, z0.s, #12 // =0xc
-; CHECK-NEXT: str q0, [x0, #16]
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: sub z1.s, z1.s, #12 // =0xc
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8i32:
@@ -92,9 +92,9 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-32 // =0xffffffffffffffe0
; CHECK-NEXT: index z0.d, #0, x8
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
+; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sub z1.d, z1.d, #66 // =0x42
+; CHECK-NEXT: sub z0.d, z0.d, #2 // =0x2
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
index 2348140f58b73..5a9e8796f9f09 100644
--- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll
@@ -16,9 +16,10 @@ define <vscale x 4 x i32> @bsl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
define <vscale x 4 x i32> @bsl_add_sub(<vscale x 4 x i32> %pre_cond, <vscale x 4 x i32> %left, <vscale x 4 x i32> %right) #0 {
; CHECK-LABEL: bsl_add_sub:
; CHECK: // %bb.0:
-; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
-; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: bsl z0.d, z0.d, z2.d, z3.d
; CHECK-NEXT: ret
%neg_cond = sub <vscale x 4 x i32> zeroinitializer, %pre_cond
%min_cond = add <vscale x 4 x i32> %pre_cond, splat(i32 -1)
diff --git a/llvm/test/CodeGen/AArch64/sve2-rsh.ll b/llvm/test/CodeGen/AArch64/sve2-rsh.ll
index 4ab0e025ceffc..d6c3fb92f99e6 100644
--- a/llvm/test/CodeGen/AArch64/sve2-rsh.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-rsh.ll
@@ -43,7 +43,7 @@ define <vscale x 2 x i64> @neg_urshr_3(<vscale x 2 x i64> %x, <vscale x 2 x i64>
define <vscale x 2 x i64> @neg_urshr_4(<vscale x 2 x i64> %x, ptr %p) {
; CHECK-LABEL: neg_urshr_4:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: add z1.d, z1.d, #32 // =0x20
; CHECK-NEXT: lsr z0.d, z1.d, #6
; CHECK-NEXT: str z1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve2-xar.ll b/llvm/test/CodeGen/AArch64/sve2-xar.ll
index e72745e551dbb..8af80c6edf8b6 100644
--- a/llvm/test/CodeGen/AArch64/sve2-xar.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-xar.ll
@@ -151,15 +151,15 @@ define <vscale x 16 x i8> @xar_nxv16i8_r(<vscale x 16 x i8> %x, <vscale x 16 x i
define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i64> %z) {
; CHECK-LABEL: xar_nxv2i64_l_neg1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z3.d, z2.d
-; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0
+; CHECK-NEXT: movprfx z3, z2
+; CHECK-NEXT: subr z3.d, z3.d, #0 // =0x0
; CHECK-NEXT: eor z0.d, z0.d, z1.d
+; CHECK-NEXT: and z2.d, z2.d, #0x3f
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z3.d, z3.d, #0x3f
-; CHECK-NEXT: and z2.d, z2.d, #0x3f
-; CHECK-NEXT: lslr z3.d, p0/m, z3.d, z0.d
-; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: orr z0.d, z3.d, z0.d
+; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z3.d
+; CHECK-NEXT: orr z0.d, z2.d, z0.d
; CHECK-NEXT: ret
%a = xor <vscale x 2 x i64> %x, %y
%b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %z)
More information about the llvm-commits
mailing list