[llvm] [AArch64][SVE] Add MOVPRFX hints for unary undef pseudos. (PR #173031)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 19 08:17:05 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Ricardo Jesus (rj-jesus)
<details>
<summary>Changes</summary>
Extend the hints added in #<!-- -->166926 to unary pseudos with undef inactive lanes.
If there's a reason I'm missing for not setting FalseLanesUndef on unary pseudos, please let me know and I'll attempt to do this a different way.
---
Patch is 117.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173031.diff
11 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+3)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+34-34)
- (modified) llvm/test/CodeGen/AArch64/sched-movprfx.ll (+2-1)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll (+6-8)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll (+38-41)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll (+156-163)
- (modified) llvm/test/CodeGen/AArch64/sve-llrint.ll (+189-198)
- (modified) llvm/test/CodeGen/AArch64/sve-lrint.ll (+189-198)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+7-10)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll (+28-32)
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll (+34-43)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 34f492a35a451..ee43448d5baec 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1187,6 +1187,9 @@ bool AArch64RegisterInfo::getRegAllocationHints(
case AArch64::DestructiveBinaryImm:
AddHintIfSuitable(R, Def.getOperand(2));
break;
+ case AArch64::DestructiveUnaryPassthru:
+ AddHintIfSuitable(R, Def.getOperand(3));
+ break;
}
}
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9558cb5162721..fd177e1496282 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3165,7 +3165,7 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
- def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;
+ def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype), FalseLanesUndef>;
defm : SVE_1_Op_PassthruUndef_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
}
@@ -3185,7 +3185,7 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
- def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;
+ def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype), FalseLanesUndef>;
defm : SVE_1_Op_PassthruUndef_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
}
@@ -3205,9 +3205,9 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H_UNDEF)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H_UNDEF)>;
@@ -4235,7 +4235,7 @@ multiclass sve2_int_un_pred_arit_s<bits<2> opc, string asm,
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
}
@@ -4255,10 +4255,10 @@ multiclass sve2_int_un_pred_arit<bits<2> opc, string asm, SDPatternOperator op>
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_3_Op_Undef_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
defm : SVE_3_Op_Undef_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
@@ -4957,10 +4957,10 @@ multiclass sve_int_un_pred_arit<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
@@ -4993,9 +4993,9 @@ multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_InReg_Extend_PassthruUndef<nxv8i16, op, nxv8i1, nxv8i8, !cast<Pseudo>(NAME # _H_UNDEF)>;
defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i8, !cast<Pseudo>(NAME # _S_UNDEF)>;
@@ -5022,8 +5022,8 @@ multiclass sve_int_un_pred_arit_w<bits<3> opc, string asm,
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i16, !cast<Pseudo>(NAME # _S_UNDEF)>;
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i16, !cast<Pseudo>(NAME # _D_UNDEF)>;
@@ -5044,7 +5044,7 @@ multiclass sve_int_un_pred_arit_d<bits<3> opc, string asm,
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i32, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
@@ -5071,10 +5071,10 @@ multiclass sve_int_un_pred_arit_bitwise<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
@@ -5113,9 +5113,9 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4bf16, op, nxv4i1, nxv4bf16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv2bf16, op, nxv2i1, nxv2bf16, !cast<Instruction>(NAME # _H)>;
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
@@ -5142,9 +5142,9 @@ multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternO
}
multiclass sve_fp_un_pred_arit_hsd<SDPatternOperator op> {
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
@@ -5155,10 +5155,10 @@ multiclass sve_fp_un_pred_arit_hsd<SDPatternOperator op> {
}
multiclass sve_int_un_pred_arit_bhsd<SDPatternOperator op> {
- def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
diff --git a/llvm/test/CodeGen/AArch64/sched-movprfx.ll b/llvm/test/CodeGen/AArch64/sched-movprfx.ll
index 9e88d1659d45f..cf1c1f45f700c 100644
--- a/llvm/test/CodeGen/AArch64/sched-movprfx.ll
+++ b/llvm/test/CodeGen/AArch64/sched-movprfx.ll
@@ -14,13 +14,14 @@ define <vscale x 2 x i64> @and_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: abs z0.d, p1/m, z2.d
+; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%data0 = tail call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %c, i1 0)
%data1 = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr %base,
i32 1,
<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> undef)
+ <vscale x 2 x i64> %c)
%out = add <vscale x 2 x i64> %data0, %data1
ret <vscale x 2 x i64> %out
}
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
index 736239599836c..ace0422bba09a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
@@ -1385,10 +1385,10 @@ define void @abs_v128i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-NEXT: mov x11, #80 // =0x50
; CHECK-NEXT: mov x12, #32 // =0x20
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
-; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
-; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
; CHECK-NEXT: mov x13, #48 // =0x30
; CHECK-NEXT: mov x14, #16 // =0x10
+; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1]
+; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1]
; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, x11, lsl #1]
; CHECK-NEXT: ld1h { z4.h }, p0/z, [x0, x12, lsl #1]
; CHECK-NEXT: ld1h { z5.h }, p0/z, [x0, x13, lsl #1]
@@ -1398,19 +1398,17 @@ define void @abs_v128i16(ptr %a) vscale_range(2,0) #0 {
; CHECK-NEXT: abs z2.h, p0/m, z2.h
; CHECK-NEXT: abs z3.h, p0/m, z3.h
; CHECK-NEXT: abs z4.h, p0/m, z4.h
+; CHECK-NEXT: abs z5.h, p0/m, z5.h
+; CHECK-NEXT: abs z6.h, p0/m, z6.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1h { z1.h }, p0, [x0, x9, lsl #1]
-; CHECK-NEXT: movprfx z1, z5
-; CHECK-NEXT: abs z1.h, p0/m, z5.h
; CHECK-NEXT: st1h { z2.h }, p0, [x0, x10, lsl #1]
-; CHECK-NEXT: movprfx z2, z6
-; CHECK-NEXT: abs z2.h, p0/m, z6.h
; CHECK-NEXT: abs z0.h, p0/m, z0.h
; CHECK-NEXT: st1h { z3.h }, p0, [x0, x11, lsl #1]
; CHECK-NEXT: st1h { z4.h }, p0, [x0, x12, lsl #1]
-; CHECK-NEXT: st1h { z1.h }, p0, [x0, x13, lsl #1]
-; CHECK-NEXT: st1h { z2.h }, p0, [x0, x14, lsl #1]
+; CHECK-NEXT: st1h { z5.h }, p0, [x0, x13, lsl #1]
+; CHECK-NEXT: st1h { z6.h }, p0, [x0, x14, lsl #1]
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <128 x i16>, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index a8b2c30bec562..c95fa965cd4d2 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -599,19 +599,18 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
; CHECK-NEXT: splice z2.d, p0, z2.d, z3.d
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
-; CHECK-NEXT: movprfx z1, z2
-; CHECK-NEXT: frintx z1.d, p0/m, z2.d
-; CHECK-NEXT: mov z4.d, z1.d[2]
+; CHECK-NEXT: frintx z2.d, p0/m, z2.d
+; CHECK-NEXT: mov z4.d, z2.d[2]
; CHECK-NEXT: mov z5.d, z0.d[2]
-; CHECK-NEXT: mov z2.d, z0.d[1]
-; CHECK-NEXT: mov z3.d, z1.d[3]
+; CHECK-NEXT: mov z1.d, z0.d[1]
+; CHECK-NEXT: mov z3.d, z2.d[3]
; CHECK-NEXT: mov z6.d, z0.d[3]
; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: mov z0.d, z1.d[1]
-; CHECK-NEXT: fcvtzs x10, d1
+; CHECK-NEXT: mov z0.d, z2.d[1]
+; CHECK-NEXT: fcvtzs x10, d2
; CHECK-NEXT: fcvtzs x11, d4
; CHECK-NEXT: fcvtzs x12, d5
-; CHECK-NEXT: fcvtzs x9, d2
+; CHECK-NEXT: fcvtzs x9, d1
; CHECK-NEXT: fcvtzs x13, d3
; CHECK-NEXT: fcvtzs x14, d6
; CHECK-NEXT: fcvtzs x15, d0
@@ -633,57 +632,55 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind {
; CHECK-LABEL: llrint_v16f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d, vl2
-; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
; CHECK-NEXT: // kill: def $q4 killed $q4 def $z4
-; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
-; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: // kill: def $q5 killed $q5 def $z5
; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT: // kill: def $q6 killed $q6 def $z6
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: // kill: def $q7 killed $q7 def $z7
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: ptrue p0.d, vl4
-; CHECK-NEXT: splice z6.d, p1, z6.d, z7.d
; CHECK-NEXT: splice z4.d, p1, z4.d, z5.d
; CHECK-NEXT: splice z2.d, p1, z2.d, z3.d
+; CHECK-NEXT: splice z6.d, p1, z6.d, z7.d
; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d
-; CHECK-NEXT: movprfx z3, z6
-; CHECK-NEXT: frintx z3.d, p0/m, z6.d
-; CHECK-NEXT: movprfx z1, z4
-; CHECK-NEXT: frintx z1.d, p0/m, z4.d
+; CHECK-NEXT: frintx z4.d, p0/m, z4.d
; CHECK-NEXT: frintx z2.d, p0/m, z2.d
+; CHECK-NEXT: frintx z6.d, p0/m, z6.d
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
-; CHECK-NEXT: mov z4.d, z3.d[2]
-; CHECK-NEXT: mov z5.d, z1.d[2]
-; CHECK-NEXT: mov z6.d, z2.d[3]
+; CHECK-NEXT: mov z3.d, z4.d[2]
+; CHECK-NEXT: mov z5.d, z2.d[3]
+; CHECK-NEXT: mov z1.d, z6.d[2]
; CHECK-NEXT: fcvtzs x11, d0
-; CHECK-NEXT: fcvtzs x12, d1
+; CHECK-NEXT: fcvtzs x12, d4
; CHECK-NEXT: fcvtzs x13, d2
-; CHECK-NEXT: fcvtzs x14, d3
-; CHECK-NEXT: mov z7.d, z3.d[3]
-; CHECK-NEXT: mov z16.d, z1.d[3]
-; CHECK-NEXT: fcvtzs x9, d4
-; CHECK-NEXT: fcvtzs x10, d5
-; CHECK-NEXT: mov z4.d, z2.d[2]
+; CHECK-NEXT: fcvtzs x14, d6
+; CHECK-NEXT: mov z7.d, z6.d[3]
+; CHECK-NEXT: mov z16.d, z0.d[3]
+; CHECK-NEXT: fcvtzs x10, d3
+; CHECK-NEXT: mov z3.d, z2.d[2]
+; CHECK-NEXT: fcvtzs x8, d5
; CHECK-NEXT: mov z5.d, z0.d[2]
-; CHECK-NEXT: fcvtzs x8, d6
+; CHECK-NEXT: fcvtzs x9, d1
+; CHECK-NEXT: mov z1.d, z4.d[3]
; CHECK-NEXT: mov z2.d, z2.d[1]
-; CHECK-NEXT: mov z6.d, z0.d[3]
-; CHECK-NEXT: mov z1.d, z1.d[1]
-; CHECK-NEXT: mov z3.d, z3.d[1]
-; CHECK-NEXT: fcvtzs x15, d4
-; CHECK-NEXT: mov z4.d, z0.d[1]
+; CHECK-NEXT: mov z17.d, z6.d[1]
+; CHECK-NEXT: fcvtzs x17, d7
+; CHECK-NEXT: fcvtzs x15, d3
+; CHECK-NEXT: mov z3.d, z0.d[1]
; CHECK-NEXT: fmov d0, x11
; CHECK-NEXT: fcvtzs x16, d5
+; CHECK-NEXT: mov z5.d, z4.d[1]
+; CHECK-NEXT: fmov d4, x12
; CHECK-NEXT: fcvtzs x11, d2
; CHECK-NEXT: fmov d2, x13
-; CHECK-NEXT: fcvtzs x17, d7
-; CHECK-NEXT: fcvtzs x18, d16
-; CHECK-NEXT: fcvtzs x0, d3
-; CHECK-NEXT: fcvtzs x13, d4
-; CHECK-NEXT: fmov d4, x12
-; CHECK-NEXT: fcvtzs x12, d6
+; CHECK-NEXT: fcvtzs x12, d16
+; CHECK-NEXT: fcvtzs x13, d3
; CHECK-NEXT: fmov d6, x14
-; CHECK-NEXT: fcvtzs x14, d1
+; CHECK-NEXT: fcvtzs x18, d1
+; CHECK-NEXT: fcvtzs x14, d5
+; CHECK-NEXT: fcvtzs x0, d17
; CHECK-NEXT: fmov d3, x15
; CHECK-NEXT: fmov d1, x16
; CHECK-NEXT: fmov d5, x10
@@ -691,9 +688,9 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind {
; CHECK-NEXT: mov v2.d[1], x11
; CHECK-NEXT: mov v0.d[1], x13
; CHECK-NEXT: mov v3.d[1], x8
-; CHECK-NEXT: mov v6.d[1], x0
; CHECK-NEXT: mov v4.d[1], x14
; CHECK-NEXT: mov v1.d[1], x12
+; CHECK-NEXT: mov v6.d[1], x0
; CHECK-NEXT: mov v5.d[1], x18
; CHECK-NEXT: mov v7.d[1], x17
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 465ba38b17874..2b8e340a1dc0d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -635,54 +635,53 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
; CHECK-i32-NEXT: splice z2.d, p0, z2.d, z3.d
; CHECK-i32-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-i32-NEXT: ptrue p0.s, vl8
-; CHECK-i32-NEXT: movprfx z1, z2
-; CHECK-i32-NEXT: frintx z1.s, p0/m, z2.s
+; CHECK-i32-NEXT: frintx z2.s, p0/m, z2.s
; CHECK-i32-NEXT: frintx z0.s, p0/m, z0.s
-; CHECK-i32-NEXT: mov z2.s, z1.s[5]
-; CHECK-i32-NEXT: mov z3.s, z1.s[4]
+; CHECK-i32-NEXT: mov z1.s, z2.s[5]
+; CHECK-i32-NEXT: mov z3.s, z2.s[4]
; CHECK-i32-NEXT: mov z5.s, z0.s[5]
; CHECK-i32-NEXT: mov z7.s, z0.s[1]
; CHECK-i32-NEXT: fcvtzs w11, s0
-; CHECK-i32-NEXT: fcvtzs w13, s1
-; CHECK-i32-NEXT: mov z4.s, z1.s[7]
-; CHECK-i32-NEXT: mov z6.s, z1.s[6]
+; CHECK-i32-NEXT: fcvtzs w13, s2
+; CHECK-i32-NEXT: mov z4.s, z2.s[7]
+; CHECK-i32-NEXT: mov z6.s, z2.s[6]
; CHECK-i32-NEXT: mov z16.s, z0....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/173031
More information about the llvm-commits
mailing list