[llvm] [LLVM][CodeGen][SVE] Use DUPM for constantfp splats. (PR #168391)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 07:54:58 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
This helps cases where the immediate range of FDUP is not sufficient.
---
Patch is 69.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168391.diff
11 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+37-34)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+1-1)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+25)
- (modified) llvm/test/CodeGen/AArch64/sve-bf16-combines.ll (+2-6)
- (modified) llvm/test/CodeGen/AArch64/sve-fp-combine.ll (+6-9)
- (modified) llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll (+6-9)
- (modified) llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll (+57-64)
- (modified) llvm/test/CodeGen/AArch64/sve-llrint.ll (+98-104)
- (modified) llvm/test/CodeGen/AArch64/sve-lrint.ll (+98-104)
- (modified) llvm/test/CodeGen/AArch64/sve-vector-splat.ll (+290-2)
- (modified) llvm/test/CodeGen/AArch64/sve-vselect-imm.ll (+6-12)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index f1db05dda4e40..08466667c0fa5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -4403,43 +4403,46 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
bool Invert) {
- if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
- uint64_t ImmVal = CNode->getZExtValue();
- SDLoc DL(N);
-
- if (Invert)
- ImmVal = ~ImmVal;
+ uint64_t ImmVal;
+ if (auto CI = dyn_cast<ConstantSDNode>(N))
+ ImmVal = CI->getZExtValue();
+ else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
+ ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ else
+ return false;
- // Shift mask depending on type size.
- switch (VT.SimpleTy) {
- case MVT::i8:
- ImmVal &= 0xFF;
- ImmVal |= ImmVal << 8;
- ImmVal |= ImmVal << 16;
- ImmVal |= ImmVal << 32;
- break;
- case MVT::i16:
- ImmVal &= 0xFFFF;
- ImmVal |= ImmVal << 16;
- ImmVal |= ImmVal << 32;
- break;
- case MVT::i32:
- ImmVal &= 0xFFFFFFFF;
- ImmVal |= ImmVal << 32;
- break;
- case MVT::i64:
- break;
- default:
- llvm_unreachable("Unexpected type");
- }
+ if (Invert)
+ ImmVal = ~ImmVal;
- uint64_t encoding;
- if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
- Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
- return true;
- }
+ // Shift mask depending on type size.
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ ImmVal &= 0xFF;
+ ImmVal |= ImmVal << 8;
+ ImmVal |= ImmVal << 16;
+ ImmVal |= ImmVal << 32;
+ break;
+ case MVT::i16:
+ ImmVal &= 0xFFFF;
+ ImmVal |= ImmVal << 16;
+ ImmVal |= ImmVal << 32;
+ break;
+ case MVT::i32:
+ ImmVal &= 0xFFFFFFFF;
+ ImmVal |= ImmVal << 32;
+ break;
+ case MVT::i64:
+ break;
+ default:
+ llvm_unreachable("Unexpected type");
}
- return false;
+
+ uint64_t encoding;
+ if (!AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding))
+ return false;
+
+ Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
+ return true;
}
// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c8c21c4822ffe..e99b3f8ff07e0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -989,7 +989,7 @@ let Predicates = [HasSVE_or_SME] in {
(DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
// Duplicate FP immediate into all vector elements
- let AddedComplexity = 2 in {
+ let AddedComplexity = 3 in {
def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)),
(FDUP_ZI_H fpimm16:$imm8)>;
def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 1664f4ad0c8fa..1e771e1fb9403 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -347,6 +347,11 @@ def SVELogicalImm16Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16>",
def SVELogicalImm32Pat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
+def SVELogicalFPImm16Pat : ComplexPattern<f16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+def SVELogicalFPImm32Pat : ComplexPattern<f32, 1, "SelectSVELogicalImm<MVT::i32>", []>;
+def SVELogicalFPImm64Pat : ComplexPattern<f64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
+def SVELogicalBFPImmPat : ComplexPattern<bf16, 1, "SelectSVELogicalImm<MVT::i16>", []>;
+
def SVELogicalImm8NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
def SVELogicalImm16NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
def SVELogicalImm32NotPat : ComplexPattern<i32, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
@@ -2160,6 +2165,26 @@ multiclass sve_int_dup_mask_imm<string asm> {
(!cast<Instruction>(NAME) i64:$imm)>;
def : Pat<(nxv2i64 (splat_vector (i64 (SVELogicalImm64Pat i64:$imm)))),
(!cast<Instruction>(NAME) i64:$imm)>;
+
+ def : Pat<(nxv8f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
+ def : Pat<(nxv4f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
+ def : Pat<(nxv2f16 (splat_vector (f16 (SVELogicalFPImm16Pat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
+ def : Pat<(nxv4f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
+ def : Pat<(nxv2f32 (splat_vector (f32 (SVELogicalFPImm32Pat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
+ def : Pat<(nxv2f64 (splat_vector (f64 (SVELogicalFPImm64Pat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
+
+ def : Pat<(nxv8bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
+ def : Pat<(nxv4bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
+ def : Pat<(nxv2bf16 (splat_vector (bf16 (SVELogicalBFPImmPat i64:$imm)))),
+ (!cast<Instruction>(NAME) i64:$imm)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
index 16e8feb0dc5bb..fc3e018f2ec7a 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-combines.ll
@@ -632,7 +632,6 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
; SVE: // %bb.0:
; SVE-NEXT: uunpkhi z3.s, z2.h
; SVE-NEXT: uunpkhi z4.s, z1.h
-; SVE-NEXT: mov w8, #32768 // =0x8000
; SVE-NEXT: uunpklo z2.s, z2.h
; SVE-NEXT: uunpklo z1.s, z1.h
; SVE-NEXT: ptrue p1.s
@@ -643,9 +642,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
; SVE-NEXT: fmul z3.s, z4.s, z3.s
; SVE-NEXT: fmul z1.s, z1.s, z2.s
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
-; SVE-NEXT: fmov h3, w8
+; SVE-NEXT: dupm z3.h, #0x8000
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
-; SVE-NEXT: mov z3.h, h3
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
; SVE-NEXT: sel z1.h, p0, z1.h, z3.h
; SVE-NEXT: uunpkhi z3.s, z0.h
@@ -665,10 +663,8 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
;
; SVE-B16B16-LABEL: fsub_sel_fmul_negzero_nxv8bf16:
; SVE-B16B16: // %bb.0:
-; SVE-B16B16-NEXT: mov w8, #32768 // =0x8000
+; SVE-B16B16-NEXT: dupm z3.h, #0x8000
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
-; SVE-B16B16-NEXT: fmov h3, w8
-; SVE-B16B16-NEXT: mov z3.h, h3
; SVE-B16B16-NEXT: sel z1.h, p0, z1.h, z3.h
; SVE-B16B16-NEXT: bfsub z0.h, z0.h, z1.h
; SVE-B16B16-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
index 53aba04028d62..57389ad2fe9b2 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll
@@ -1134,10 +1134,9 @@ define <vscale x 2 x double> @fadd_sel_fmul_d_negzero(<vscale x 2 x double> %a,
define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: fsub_sel_fmul_h_negzero:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #32768 // =0x8000
+; CHECK-NEXT: dupm z3.h, #0x8000
; CHECK-NEXT: fmul z1.h, z1.h, z2.h
-; CHECK-NEXT: mov z2.h, w8
-; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h
+; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h
; CHECK-NEXT: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%fmul = fmul <vscale x 8 x half> %b, %c
@@ -1150,10 +1149,9 @@ define <vscale x 8 x half> @fsub_sel_fmul_h_negzero(<vscale x 8 x half> %a, <vsc
define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: fsub_sel_fmul_s_negzero:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT: mov z3.s, #0x80000000
; CHECK-NEXT: fmul z1.s, z1.s, z2.s
-; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s
+; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
; CHECK-NEXT: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%fmul = fmul <vscale x 4 x float> %b, %c
@@ -1166,10 +1164,9 @@ define <vscale x 4 x float> @fsub_sel_fmul_s_negzero(<vscale x 4 x float> %a, <v
define <vscale x 2 x double> @fsub_sel_fmul_d_negzero(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: fsub_sel_fmul_d_negzero:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-NEXT: mov z3.d, #0x8000000000000000
; CHECK-NEXT: fmul z1.d, z1.d, z2.d
-; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d
+; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
; CHECK-NEXT: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%fmul = fmul <vscale x 2 x double> %b, %c
diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
index 8750867c56731..1223ae1c0cbdd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
@@ -51,10 +51,9 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: mov w8, #32768 // =0x8000
+; CHECK-NEXT: dupm z2.h, #0x8000
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: str z0, [sp]
-; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: st1h { z2.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: ptrue p0.h
@@ -77,12 +76,11 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT: mov w8, #32768 // =0x8000
; CHECK-NEXT: str z1, [sp]
+; CHECK-NEXT: addvl x8, sp, #1
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: fadda h2, p0, h2, z0.h
-; CHECK-NEXT: mov z0.h, w8
-; CHECK-NEXT: addvl x8, sp, #1
+; CHECK-NEXT: dupm z0.h, #0x8000
; CHECK-NEXT: st1h { z0.d }, p1, [sp, #1, mul vl]
; CHECK-NEXT: ldr z1, [sp]
; CHECK-NEXT: str z1, [sp, #1, mul vl]
@@ -105,11 +103,10 @@ define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT: mov w8, #32768 // =0x8000
+; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: fadda h2, p0, h2, z0.h
-; CHECK-NEXT: uunpklo z0.s, z1.h
-; CHECK-NEXT: mov z1.h, w8
-; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
+; CHECK-NEXT: dupm z0.h, #0x8000
+; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
; CHECK-NEXT: fadda h2, p0, h2, z0.h
; CHECK-NEXT: fmov s0, s2
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
index 4ae7ac7b292e9..897ade00320db 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
@@ -454,18 +454,17 @@ declare <vscale x 4 x i64> @llvm.fptosi.sat.nxv4f16.nxv4i64(<vscale x 4 x half>)
define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
; CHECK-LABEL: test_signed_v2f16_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64511 // =0xfbff
+; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov w8, #31743 // =0x7bff
-; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: mov z2.d, #0xffffffff80000000
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.d, #0xffffffff80000000
-; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT: mov z2.d, #0x7fffffff
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z1.d, #0x7fffffff
; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d
+; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f16.nxv2i32(<vscale x 2 x half> %f)
@@ -475,18 +474,17 @@ define <vscale x 2 x i32> @test_signed_v2f16_v2i32(<vscale x 2 x half> %f) {
define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
; CHECK-LABEL: test_signed_v4f16_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64511 // =0xfbff
+; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov w8, #31743 // =0x7bff
-; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: mov z2.s, #0x80000000
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.s, #0x80000000
-; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT: mov z2.s, #0x7fffffff
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z1.s, #0x7fffffff
; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h
-; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s
+; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f16.nxv4i32(<vscale x 4 x half> %f)
@@ -496,26 +494,25 @@ define <vscale x 4 x i32> @test_signed_v4f16_v4i32(<vscale x 4 x half> %f) {
define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
; CHECK-LABEL: test_signed_v8f16_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64511 // =0xfbff
-; CHECK-NEXT: uunpklo z1.s, z0.h
+; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT: uunpklo z2.s, z0.h
+; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: uunpkhi z0.s, z0.h
-; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: mov z3.s, #0x80000000
; CHECK-NEXT: mov z4.s, #0x80000000
; CHECK-NEXT: mov z5.h, w8
-; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT: mov z2.s, #0x7fffffff
+; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z1.s, #0x7fffffff
; CHECK-NEXT: fcmgt p3.h, p0/z, z0.h, z5.h
-; CHECK-NEXT: fcvtzs z3.s, p1/m, z1.h
-; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z5.h
+; CHECK-NEXT: fcvtzs z3.s, p1/m, z2.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z5.h
; CHECK-NEXT: fcvtzs z4.s, p2/m, z0.h
-; CHECK-NEXT: fcmuo p2.h, p0/z, z1.h, z1.h
+; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z2.h
; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT: sel z0.s, p1, z2.s, z3.s
-; CHECK-NEXT: sel z1.s, p3, z2.s, z4.s
+; CHECK-NEXT: sel z0.s, p1, z1.s, z3.s
+; CHECK-NEXT: sel z1.s, p3, z1.s, z4.s
; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0
; CHECK-NEXT: ret
@@ -526,18 +523,17 @@ define <vscale x 8 x i32> @test_signed_v8f16_v8i32(<vscale x 8 x half> %f) {
define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
; CHECK-LABEL: test_signed_v4f16_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #63488 // =0xf800
+; CHECK-NEXT: dupm z1.h, #0xf800
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov w8, #30719 // =0x77ff
+; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.h, w8
-; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.s, #32767 // =0x7fff
-; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.h
+; CHECK-NEXT: mov z1.s, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT: fcvtzs z1.s, p1/m, z0.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT: mov z2.s, #32767 // =0x7fff
; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s
+; CHECK-NEXT: sel z0.s, p1, z2.s, z1.s
; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f16.nxv4i16(<vscale x 4 x half> %f)
@@ -547,18 +543,17 @@ define <vscale x 4 x i16> @test_signed_v4f16_v4i16(<vscale x 4 x half> %f) {
define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
; CHECK-LABEL: test_signed_v8f16_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #63488 // =0xf800
+; CHECK-NEXT: dupm z1.h, #0xf800
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z2.h, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov w8, #30719 // =0x77ff
+; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.h, w8
-; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.h, #32767 // =0x7fff
-; CHECK-NEXT: fcvtzs z2.h, p1/m, z0.h
+; CHECK-NEXT: mov z1.h, #-32768 // =0xffffffffffff8000
+; CHECK-NEXT: fcvtzs z1.h, p1/m, z0.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT: mov z2.h, #32767 // =0x7fff
; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT: sel z0.h, p2, z1.h, z2.h
+; CHECK-NEXT: sel z0.h, p1, z2.h, z1.h
; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f16.nxv8i16(<vscale x 8 x half> %f)
@@ -568,18 +563,17 @@ define <vscale x 8 x i16> @test_signed_v8f16_v8i16(<vscale x 8 x half> %f) {
define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
; CHECK-LABEL: test_signed_v2f16_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64511 // =0xfbff
+; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: mov w8, #31743 // =0x7bff
-; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: mov z2.d, #0x8000000000000000
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z1.d, #0x8000000000000000
-; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h
-; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: fcvtzs z2.d, p1/m, z0.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT: mov z1.d, #0x7fffffffffffffff
; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h
-; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.h
-; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d
+; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
; CHECK-NEXT: ret
%x = call <vscale x 2 x i64> @llvm.fptosi.sat.nxv2f16.nxv2i64(<vscale x 2 x half> %f)
@@ -589,26 +583,25 @@ define <vscale x 2 x i64> @test_signed_v2f16_v2i64(<vscale x 2 x half> %f) {
define <vscale x 4 x i64> @test_signed_v4f16_v4i64(<vscale x 4 x half> %f) {
; CHECK-LABEL: test_signed_v4f16_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #64511 // =0xfbff
-; CHECK-NEXT: uunpklo z1.d, z0.s
+; CHECK-NEXT: mov z1.h, #-1025 // =0xfffffffffffffbff
+; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov w8, #31743 // =0x7bff
; CHECK-NEXT: mov z3.d, #0x8000000000000000
; CHECK-NEXT: mov z4.d, #0x8000000000000000
; CHECK-NEXT: mov z5.h, w8
-; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h
-; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h
-; C...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/168391
More information about the llvm-commits
mailing list