[PATCH] D96700: [llvm][Aarch64][SVE] Remove extra fmov instruction with certain literals
David Truby via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 15 06:33:56 PST 2021
DavidTruby updated this revision to Diff 323738.
DavidTruby added a comment.
Remove faulty f64 patterns
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D96700/new/
https://reviews.llvm.org/D96700
Files:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
llvm/test/CodeGen/AArch64/sve-vector-splat.ll
Index: llvm/test/CodeGen/AArch64/sve-vector-splat.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -372,5 +372,32 @@
ret <vscale x 4 x float> %r
}
+define <vscale x 2 x float> @splat_nxv2f32_fmov_fold() {
+; CHECK-LABEL: splat_nxv2f32_fmov_fold
+; CHECK: mov w8, #1109917696
+; CHECK-NEXT: mov z0.s, w8
+ %1 = insertelement <vscale x 2 x float> undef, float 4.200000e+01, i32 0
+ %2 = shufflevector <vscale x 2 x float> %1, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x float> %2
+}
+
+define <vscale x 4 x float> @splat_nxv4f32_fmov_fold() {
+; CHECK-LABEL: splat_nxv4f32_fmov_fold
+; CHECK: mov w8, #1109917696
+; CHECK-NEXT: mov z0.s, w8
+ %1 = insertelement <vscale x 4 x float> undef, float 4.200000e+01, i32 0
+ %2 = shufflevector <vscale x 4 x float> %1, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 2 x double> @splat_nxv2f64_fmov_fold() {
+; CHECK-LABEL: splat_nxv2f64_fmov_fold
+; CHECK: mov x8, #4631107791820423168
+; CHECK-NEXT: mov z0.d, x8
+ %1 = insertelement <vscale x 2 x double> undef, double 4.200000e+01, i32 0
+ %2 = shufflevector <vscale x 2 x double> %1, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x double> %2
+}
+
; +bf16 is required for the bfloat version.
attributes #0 = { "target-features"="+sve,+bf16" }
Index: llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
+++ llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
@@ -130,12 +130,37 @@
ret <vscale x 2 x double> %out
}
+define <vscale x 2 x float> @dup_fmov_imm_f32_2() {
+; CHECK-LABEL: dup_fmov_imm_f32_2:
+; CHECK: mov w8, #1109917696
+; CHECK-NEXT: mov z0.s, w8
+ %out = tail call <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01)
+ ret <vscale x 2 x float> %out
+}
+
+define <vscale x 4 x float> @dup_fmov_imm_f32_4() {
+; CHECK-LABEL: dup_fmov_imm_f32_4:
+; CHECK: mov w8, #1109917696
+; CHECK-NEXT: mov z0.s, w8
+ %out = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @dup_fmov_imm_f64_2() {
+; CHECK-LABEL: dup_fmov_imm_f64_2:
+; CHECK: mov x8, #4631107791820423168
+; CHECK-NEXT: mov z0.d, x8
+ %out = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01)
+ ret <vscale x 2 x double> %out
+}
+
declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8)
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat)
+declare <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float)
declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -553,6 +553,14 @@
def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm i32:$a, i32:$b)))),
(DUP_ZI_D $a, $b)>;
+ // Duplicate immediate FP into all vector elements.
+ def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))),
+ (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
+ def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))),
+ (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>;
+ def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))),
+ (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>;
+
// Duplicate FP immediate into all vector elements
let AddedComplexity = 2 in {
def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)),
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D96700.323738.patch
Type: text/x-patch
Size: 4195 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210215/ad2e68bb/attachment.bin>
More information about the llvm-commits
mailing list