[llvm] [LLVM][SVE] Improve code generation for i1 based int_to_fp operations. (PR #129229)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 03:35:32 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
Rather than extending the predicate we can use it directly to select between the two possible results.
---
Full diff: https://github.com/llvm/llvm-project/pull/129229.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+4-5)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+13)
- (modified) llvm/test/CodeGen/AArch64/sve-fcvt.ll (+32-48)
- (modified) llvm/test/CodeGen/AArch64/sve-split-fcvt.ll (+10-12)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7a471662ea075..110a592df2d2d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5034,11 +5034,10 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
if (VT.isScalableVector()) {
if (InVT.getVectorElementType() == MVT::i1) {
- // We can't directly extend an SVE predicate; extend it first.
- unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- EVT CastVT = getPromotedVTForPredicate(InVT);
- In = DAG.getNode(CastOpc, dl, CastVT, In);
- return DAG.getNode(Opc, dl, VT, In);
+ SDValue FalseVal = DAG.getConstantFP(0.0, dl, VT);
+ SDValue TrueVal = IsSigned ? DAG.getConstantFP(-1.0, dl, VT)
+ : DAG.getConstantFP(1.0, dl, VT);
+ return DAG.getNode(ISD::VSELECT, dl, VT, In, TrueVal, FalseVal);
}
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8255b267bd7e9..8d2e7f4a8ed10 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5452,6 +5452,19 @@ multiclass sve_int_dup_fpimm_pred<string asm> {
(!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, fpimm32:$imm8), 1>;
def : InstAlias<"fmov $Zd, $Pg/m, $imm8",
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, fpimm64:$imm8), 1>;
+
+ def : Pat<(nxv8f16 (vselect nxv8i1:$pg, (splat_vector fpimm16:$imm8), nxv8f16:$zd)),
+ (!cast<Instruction>(NAME # _H) $zd, $pg, fpimm16:$imm8)>;
+ def : Pat<(nxv4f16 (vselect nxv4i1:$pg, (splat_vector fpimm16:$imm8), nxv4f16:$zd)),
+ (!cast<Instruction>(NAME # _H) $zd, $pg, fpimm16:$imm8)>;
+ def : Pat<(nxv2f16 (vselect nxv2i1:$pg, (splat_vector fpimm16:$imm8), nxv2f16:$zd)),
+ (!cast<Instruction>(NAME # _H) $zd, $pg, fpimm16:$imm8)>;
+ def : Pat<(nxv4f32 (vselect nxv4i1:$pg, (splat_vector fpimm32:$imm8), nxv4f32:$zd)),
+ (!cast<Instruction>(NAME # _S) $zd, $pg, fpimm32:$imm8)>;
+ def : Pat<(nxv2f32 (vselect nxv2i1:$pg, (splat_vector fpimm32:$imm8), nxv2f32:$zd)),
+ (!cast<Instruction>(NAME # _S) $zd, $pg, fpimm32:$imm8)>;
+ def : Pat<(nxv2f64 (vselect nxv2i1:$pg, (splat_vector fpimm64:$imm8), nxv2f64:$zd)),
+ (!cast<Instruction>(NAME # _D) $zd, $pg, fpimm64:$imm8)>;
}
class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll
index fc5128fffad36..a6749984af427 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll
@@ -454,9 +454,8 @@ define <vscale x 2 x i64> @fcvtzu_d_nxv2f64(<vscale x 2 x double> %a) {
define <vscale x 2 x half> @scvtf_h_nxv2i1(<vscale x 2 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 2 x i1> %a to <vscale x 2 x half>
ret <vscale x 2 x half> %res
@@ -495,9 +494,8 @@ define <vscale x 2 x half> @scvtf_h_nxv2i64(<vscale x 2 x i64> %a) {
define <vscale x 3 x half> @scvtf_h_nxv3i1(<vscale x 3 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv3i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 3 x i1> %a to <vscale x 3 x half>
ret <vscale x 3 x half> %res
@@ -516,9 +514,8 @@ define <vscale x 3 x half> @scvtf_h_nxv3i16(<vscale x 3 x i16> %a) {
define <vscale x 4 x half> @scvtf_h_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x half>
ret <vscale x 4 x half> %res
@@ -547,9 +544,8 @@ define <vscale x 4 x half> @scvtf_h_nxv4i32(<vscale x 4 x i32> %a) {
define <vscale x 7 x half> @scvtf_h_nxv7i1(<vscale x 7 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv7i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 7 x i1> %a to <vscale x 7 x half>
ret <vscale x 7 x half> %res
@@ -568,9 +564,8 @@ define <vscale x 7 x half> @scvtf_h_nxv7i16(<vscale x 7 x i16> %a) {
define <vscale x 8 x half> @scvtf_h_nxv8i1(<vscale x 8 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv8i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 8 x i1> %a to <vscale x 8 x half>
ret <vscale x 8 x half> %res
@@ -589,9 +584,8 @@ define <vscale x 8 x half> @scvtf_h_nxv8i16(<vscale x 8 x i16> %a) {
define <vscale x 2 x float> @scvtf_s_nxv2i1(<vscale x 2 x i1> %a) {
; CHECK-LABEL: scvtf_s_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: fmov z0.s, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 2 x i1> %a to <vscale x 2 x float>
ret <vscale x 2 x float> %res
@@ -620,9 +614,8 @@ define <vscale x 2 x float> @scvtf_s_nxv2i64(<vscale x 2 x i64> %a) {
define <vscale x 3 x float> @scvtf_s_nxv3i1(<vscale x 3 x i1> %a) {
; CHECK-LABEL: scvtf_s_nxv3i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: fmov z0.s, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 3 x i1> %a to <vscale x 3 x float>
ret <vscale x 3 x float> %res
@@ -641,9 +634,8 @@ define <vscale x 3 x float> @scvtf_s_nxv3i32(<vscale x 3 x i32> %a) {
define <vscale x 4 x float> @scvtf_s_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_s_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: fmov z0.s, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x float>
ret <vscale x 4 x float> %res
@@ -662,9 +654,8 @@ define <vscale x 4 x float> @scvtf_s_nxv4i32(<vscale x 4 x i32> %a) {
define <vscale x 2 x double> @scvtf_d_nxv2i1(<vscale x 2 x i1> %a) {
; CHECK-LABEL: scvtf_d_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: fmov z0.d, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 2 x i1> %a to <vscale x 2 x double>
ret <vscale x 2 x double> %res
@@ -695,9 +686,8 @@ define <vscale x 2 x double> @scvtf_d_nxv2i64(<vscale x 2 x i64> %a) {
define <vscale x 2 x half> @ucvtf_h_nxv2i1(<vscale x 2 x i1> %a) {
; CHECK-LABEL: ucvtf_h_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 2 x i1> %a to <vscale x 2 x half>
ret <vscale x 2 x half> %res
@@ -736,9 +726,8 @@ define <vscale x 2 x half> @ucvtf_h_nxv2i64(<vscale x 2 x i64> %a) {
define <vscale x 3 x half> @ucvtf_h_nxv3i1(<vscale x 3 x i1> %a) {
; CHECK-LABEL: ucvtf_h_nxv3i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 3 x i1> %a to <vscale x 3 x half>
ret <vscale x 3 x half> %res
@@ -767,9 +756,8 @@ define <vscale x 3 x half> @ucvtf_h_nxv3i32(<vscale x 3 x i32> %a) {
define <vscale x 4 x half> @ucvtf_h_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: ucvtf_h_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x half>
ret <vscale x 4 x half> %res
@@ -798,9 +786,8 @@ define <vscale x 4 x half> @ucvtf_h_nxv4i32(<vscale x 4 x i32> %a) {
define <vscale x 8 x half> @ucvtf_h_nxv8i1(<vscale x 8 x i1> %a) {
; CHECK-LABEL: ucvtf_h_nxv8i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fmov z0.h, p0/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 8 x i1> %a to <vscale x 8 x half>
ret <vscale x 8 x half> %res
@@ -819,9 +806,8 @@ define <vscale x 8 x half> @ucvtf_h_nxv8i16(<vscale x 8 x i16> %a) {
define <vscale x 2 x float> @ucvtf_s_nxv2i1(<vscale x 2 x i1> %a) {
; CHECK-LABEL: ucvtf_s_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: fmov z0.s, p0/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 2 x i1> %a to <vscale x 2 x float>
ret <vscale x 2 x float> %res
@@ -850,9 +836,8 @@ define <vscale x 2 x float> @ucvtf_s_nxv2i64(<vscale x 2 x i64> %a) {
define <vscale x 4 x float> @ucvtf_s_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: ucvtf_s_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: fmov z0.s, p0/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x float>
ret <vscale x 4 x float> %res
@@ -871,9 +856,8 @@ define <vscale x 4 x float> @ucvtf_s_nxv4i32(<vscale x 4 x i32> %a) {
define <vscale x 2 x double> @ucvtf_d_nxv2i1(<vscale x 2 x i1> %a) {
; CHECK-LABEL: ucvtf_d_nxv2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: fmov z0.d, p0/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 2 x i1> %a to <vscale x 2 x double>
ret <vscale x 2 x double> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
index bc015116917d8..5c84551432909 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
@@ -331,13 +331,12 @@ define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_d_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: punpklo p2.h, p0.b
+; CHECK-NEXT: mov z1.d, #0 // =0x0
+; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
-; CHECK-NEXT: mov z0.d, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: scvtf z0.d, p1/m, z0.d
-; CHECK-NEXT: scvtf z1.d, p1/m, z1.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: fmov z1.d, p0/m, #-1.00000000
+; CHECK-NEXT: fmov z0.d, p1/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
ret <vscale x 4 x double> %res
@@ -393,13 +392,12 @@ define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: ucvtf_d_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: punpklo p2.h, p0.b
+; CHECK-NEXT: mov z1.d, #0 // =0x0
+; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
-; CHECK-NEXT: mov z0.d, p2/z, #1 // =0x1
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
-; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d
-; CHECK-NEXT: ucvtf z1.d, p1/m, z1.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: fmov z1.d, p0/m, #1.00000000
+; CHECK-NEXT: fmov z0.d, p1/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
ret <vscale x 4 x double> %res
``````````
</details>
https://github.com/llvm/llvm-project/pull/129229
More information about the llvm-commits
mailing list