[llvm] d5f1131 - [AArch64] Default to zero-cycle-zeroing FP registers
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 6 01:48:00 PDT 2021
Author: Sjoerd Meijer
Date: 2021-04-06T09:47:50+01:00
New Revision: d5f1131c812df57560c7563475cb0d674a101636
URL: https://github.com/llvm/llvm-project/commit/d5f1131c812df57560c7563475cb0d674a101636
DIFF: https://github.com/llvm/llvm-project/commit/d5f1131c812df57560c7563475cb0d674a101636.diff
LOG: [AArch64] Default to zero-cycle-zeroing FP registers
It is generally beneficial to prefer "movi d0, #0" over "fmov s0, wzr" as this
is most efficient across all cores; it is recognised as a zeroing idiom. For
newer cores, fmov instructions can also be eliminated early and there is no
difference with movi, but some implementations lack this so is not true for
other/older cores. Thus this standardises on using movi as this should always
gives the same or better performance than the fmov with wzr.
Differential Revision: https://reviews.llvm.org/D99586
Added:
Modified:
llvm/lib/Target/AArch64/AArch64.td
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
llvm/test/CodeGen/AArch64/arm64-rev.ll
llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
llvm/test/CodeGen/AArch64/f16-imm.ll
llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
llvm/test/CodeGen/AArch64/remat-float0.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index bdf2e517deda..133a6b16e979 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -147,12 +147,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
"Has zero-cycle zeroing instructions for generic registers">;
-def FeatureZCZeroingFP : SubtargetFeature<"zcz-fp", "HasZeroCycleZeroingFP", "true",
- "Has zero-cycle zeroing instructions for FP registers">;
+def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false",
+ "Has no zero-cycle zeroing instructions for FP registers">;
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions",
- [FeatureZCZeroingGP, FeatureZCZeroingFP]>;
+ [FeatureZCZeroingGP]>;
/// ... but the floating-point version doesn't quite work in rare cases on older
/// CPUs.
@@ -915,8 +915,7 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeatureLSLFast,
FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureZCZeroingFP]>;
+ FeaturePredictableSelectIsExpensive]>;
def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M4 processors",
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 6447103128a5..ce5a0128e622 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -196,9 +196,14 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing = false;
bool HasZeroCycleZeroingGP = false;
- bool HasZeroCycleZeroingFP = false;
bool HasZeroCycleZeroingFPWorkaround = false;
+ // It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
+ // as movi is more efficient across all cores. Newer cores can eliminate
+ // fmovs early and there is no
diff erence with movi, but this not true for
+ // all implementations.
+ bool HasZeroCycleZeroingFP = true;
+
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
index 8703b2e21ce8..a09aae2962cf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
@@ -15,7 +15,7 @@ define float @fmov_float2() {
; CHECK-LABEL: fmov_float2
; CHECK: fmov s0, wzr
; GISEL-LABEL: fmov_float2
-; GISEL: fmov s0, wzr
+; GISEL: movi d0, #0000000000000000
ret float 0.0e+00
}
@@ -31,7 +31,7 @@ define double @fmov_double2() {
; CHECK-LABEL: fmov_double2
; CHECK: fmov d0, xzr
; GISEL-LABEL: fmov_double2
-; GISEL: fmov d0, xzr
+; GISEL: movi d0, #0000000000000000
ret double 0.0e+00
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll b/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
index 70548cad205f..9a753748a29e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-contract-zero.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=arm64 -fp-contract=fast -o - %s | FileCheck %s
@@ -5,8 +6,11 @@
; -0.0. It's also good, though not essential, that we don't resort to a litpool.
define double @test_fms_fold(double %a, double %b) {
; CHECK-LABEL: test_fms_fold:
-; CHECK: fmov {{d[0-9]+}}, xzr
-; CHECK: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d2, #0000000000000000
+; CHECK-NEXT: fmul d1, d1, d2
+; CHECK-NEXT: fnmsub d0, d0, d2, d1
+; CHECK-NEXT: ret
%mul = fmul double %a, 0.000000e+00
%mul1 = fmul double %b, 0.000000e+00
%sub = fsub double %mul, %mul1
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 5f76f0a1c271..cee47d733cb8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -561,7 +561,7 @@ define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest
;
; FALLBACK-LABEL: float_vrev64:
; FALLBACK: // %bb.0: // %entry
-; FALLBACK-NEXT: fmov s0, wzr
+; FALLBACK-NEXT: movi d0, #0000000000000000
; FALLBACK-NEXT: mov.s v0[1], v0[0]
; FALLBACK-NEXT: mov.s v0[2], v0[0]
; FALLBACK-NEXT: adrp x8, .LCPI28_0
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index b0d9db3f7eca..de3271794a8b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=-zcz | FileCheck %s -check-prefixes=ALL,NONEGP,NONEFP
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=-zcz-gp,+no-zcz-fp | FileCheck %s -check-prefixes=ALL,NONEGP,NONEFP
; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz -mattr=+fullfp16 | FileCheck %s -check-prefixes=ALL,ZEROGP,ZERO16
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gp | FileCheck %s -check-prefixes=ALL,ZEROGP,NONEFP
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-fp | FileCheck %s -check-prefixes=ALL,NONEGP,ZEROFP
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+zcz-gp,+no-zcz-fp | FileCheck %s -check-prefixes=ALL,ZEROGP,NONEFP
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s -check-prefixes=ALL,NONEGP,ZEROFP
; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s -check-prefixes=ALL,ZEROGP,NONEFP
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=apple-a10 | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 | FileCheck %s -check-prefixes=ALL,ZEROGP,NONE16
; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=exynos-m3 | FileCheck %s -check-prefixes=ALL,NONEGP,ZEROFP
; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=kryo | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=falkor | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
+; UN: llc < %s -mtriple=aarch64-linux-gnu -mcpu=falkor | FileCheck %s -check-prefixes=ALL,ZEROGP,ZEROFP
declare void @bar(half, float, double, <2 x double>)
declare void @bari(i32, i32)
diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll
index 42c49f7cc7ac..b49262e6e946 100644
--- a/llvm/test/CodeGen/AArch64/f16-imm.ll
+++ b/llvm/test/CodeGen/AArch64/f16-imm.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ
+; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+no-zcz-fp | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-ZCZ
; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index 7f57d5b771ed..8a2171939803 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -20,7 +20,7 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
; CHECK-LABEL: test_signed_i1_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s1, #-1.00000000
-; CHECK-NEXT: fmov s2, wzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fmaxnm s1, s0, s1
; CHECK-NEXT: fminnm s1, s1, s2
; CHECK-NEXT: fcvtzs w8, s1
@@ -243,7 +243,7 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
; CHECK-LABEL: test_signed_i1_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d1, #-1.00000000
-; CHECK-NEXT: fmov d2, xzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fmaxnm d1, d0, d1
; CHECK-NEXT: fminnm d1, d1, d2
; CHECK-NEXT: fcvtzs w8, d1
@@ -462,7 +462,7 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmov s1, #-1.00000000
-; CHECK-NEXT: fmov s2, wzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fmaxnm s1, s0, s1
; CHECK-NEXT: fminnm s1, s1, s2
; CHECK-NEXT: fcvtzs w8, s1
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index d0a9c4ddd67f..10d8c0be6b19 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -1469,7 +1469,7 @@ define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: fmov s2, #-1.00000000
-; CHECK-NEXT: fmov s3, wzr
+; CHECK-NEXT: movi d3, #0000000000000000
; CHECK-NEXT: fmaxnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s1
; CHECK-NEXT: fmaxnm s1, s0, s2
@@ -1849,7 +1849,7 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov d1, v0.d[1]
; CHECK-NEXT: fmov d2, #-1.00000000
-; CHECK-NEXT: fmov d3, xzr
+; CHECK-NEXT: movi d3, #0000000000000000
; CHECK-NEXT: fmaxnm d4, d1, d2
; CHECK-NEXT: fcmp d1, d1
; CHECK-NEXT: fmaxnm d1, d0, d2
@@ -2212,7 +2212,7 @@ define <4 x i1> @test_signed_v4f16_v4i1(<4 x half> %f) {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov s2, #-1.00000000
; CHECK-NEXT: fcvt s4, h0
-; CHECK-NEXT: fmov s3, wzr
+; CHECK-NEXT: movi d3, #0000000000000000
; CHECK-NEXT: fmaxnm s5, s4, s2
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fminnm s5, s5, s3
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index ef29e7890357..6a192107ff4b 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -19,7 +19,7 @@ declare i128 @llvm.fptoui.sat.i128.f32(float)
define i1 @test_unsigned_i1_f32(float %f) nounwind {
; CHECK-LABEL: test_unsigned_i1_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fminnm s0, s0, s1
@@ -33,7 +33,7 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind {
define i8 @test_unsigned_i8_f32(float %f) nounwind {
; CHECK-LABEL: test_unsigned_i8_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: mov w8, #1132396544
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, w8
@@ -48,7 +48,7 @@ define i13 @test_unsigned_i13_f32(float %f) nounwind {
; CHECK-LABEL: test_unsigned_i13_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #63488
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #17919, lsl #16
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, w8
@@ -63,7 +63,7 @@ define i16 @test_unsigned_i16_f32(float %f) nounwind {
; CHECK-LABEL: test_unsigned_i16_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65280
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, w8
@@ -78,7 +78,7 @@ define i19 @test_unsigned_i19_f32(float %f) nounwind {
; CHECK-LABEL: test_unsigned_i19_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65504
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #18687, lsl #16
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, w8
@@ -198,7 +198,7 @@ declare i128 @llvm.fptoui.sat.i128.f64(double)
define i1 @test_unsigned_i1_f64(double %f) nounwind {
; CHECK-LABEL: test_unsigned_i1_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, #1.00000000
; CHECK-NEXT: fminnm d0, d0, d1
@@ -213,7 +213,7 @@ define i8 @test_unsigned_i8_f64(double %f) nounwind {
; CHECK-LABEL: test_unsigned_i8_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #246290604621824
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16495, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, x8
@@ -228,7 +228,7 @@ define i13 @test_unsigned_i13_f64(double %f) nounwind {
; CHECK-LABEL: test_unsigned_i13_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #280375465082880
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16575, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, x8
@@ -243,7 +243,7 @@ define i16 @test_unsigned_i16_f64(double %f) nounwind {
; CHECK-LABEL: test_unsigned_i16_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281337537757184
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, x8
@@ -258,7 +258,7 @@ define i19 @test_unsigned_i19_f64(double %f) nounwind {
; CHECK-LABEL: test_unsigned_i19_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281457796841472
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16671, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, x8
@@ -273,7 +273,7 @@ define i32 @test_unsigned_i32_f64(double %f) nounwind {
; CHECK-LABEL: test_unsigned_i32_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, x8
@@ -288,7 +288,7 @@ define i50 @test_unsigned_i50_f64(double %f) nounwind {
; CHECK-LABEL: test_unsigned_i50_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-8
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #17167, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, x8
@@ -378,7 +378,7 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind {
; CHECK-LABEL: test_unsigned_i1_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fminnm s0, s0, s1
@@ -393,7 +393,7 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind {
; CHECK-LABEL: test_unsigned_i8_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: mov w8, #1132396544
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, w8
@@ -409,7 +409,7 @@ define i13 @test_unsigned_i13_f16(half %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #63488
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #17919, lsl #16
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, w8
@@ -425,7 +425,7 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65280
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, w8
@@ -441,7 +441,7 @@ define i19 @test_unsigned_i19_f16(half %f) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65504
; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #18687, lsl #16
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s1, w8
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 89233dedb054..1f259ff1cd6f 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -345,7 +345,7 @@ define <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
; CHECK-LABEL: test_unsigned_v1f64_v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, x8
@@ -361,7 +361,7 @@ define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
@@ -383,7 +383,7 @@ define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
; CHECK-LABEL: test_unsigned_v3f64_v3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
-; CHECK-NEXT: fmov d3, xzr
+; CHECK-NEXT: movi d3, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d3
; CHECK-NEXT: fmov d4, x8
@@ -411,7 +411,7 @@ define <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
; CHECK-LABEL: test_unsigned_v4f64_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
-; CHECK-NEXT: fmov d2, xzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: mov d3, v0.d[1]
; CHECK-NEXT: mov d4, v1.d[1]
@@ -441,7 +441,7 @@ define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
; CHECK-LABEL: test_unsigned_v5f64_v5i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
-; CHECK-NEXT: fmov d5, xzr
+; CHECK-NEXT: movi d5, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d5
; CHECK-NEXT: fmov d6, x8
@@ -468,7 +468,7 @@ define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-LABEL: test_unsigned_v6f64_v6i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
-; CHECK-NEXT: fmov d6, xzr
+; CHECK-NEXT: movi d6, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d6
; CHECK-NEXT: fmov d7, x8
@@ -1132,7 +1132,7 @@ define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fmov s2, #1.00000000
; CHECK-NEXT: mov s3, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
@@ -1153,7 +1153,7 @@ define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: mov w8, #1132396544
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
@@ -1176,7 +1176,7 @@ define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #63488
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #17919, lsl #16
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
@@ -1199,7 +1199,7 @@ define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65280
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
@@ -1222,7 +1222,7 @@ define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65504
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: fmov s1, wzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk w8, #18687, lsl #16
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
@@ -1433,7 +1433,7 @@ declare <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double>)
define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: fmov d2, #1.00000000
; CHECK-NEXT: mov d3, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
@@ -1454,7 +1454,7 @@ define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #246290604621824
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16495, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
@@ -1476,7 +1476,7 @@ define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i13:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #280375465082880
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16575, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
@@ -1498,7 +1498,7 @@ define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281337537757184
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
@@ -1520,7 +1520,7 @@ define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i19:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281457796841472
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16671, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
@@ -1542,7 +1542,7 @@ define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i32_duplicate:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
@@ -1564,7 +1564,7 @@ define <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i50:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-8
-; CHECK-NEXT: fmov d1, xzr
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: movk x8, #17167, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
@@ -1726,7 +1726,7 @@ define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) {
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fmov s2, wzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
; CHECK-NEXT: fcvt s0, h0
@@ -1761,7 +1761,7 @@ define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) {
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fmov s2, wzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: mov w8, #1132396544
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
@@ -1798,7 +1798,7 @@ define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) {
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fmov s2, wzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: movk w8, #17919, lsl #16
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
@@ -1835,7 +1835,7 @@ define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) {
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fmov s2, wzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
@@ -1872,7 +1872,7 @@ define <4 x i19> @test_unsigned_v4f16_v4i19(<4 x half> %f) {
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fmov s2, wzr
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: movk w8, #18687, lsl #16
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
diff --git a/llvm/test/CodeGen/AArch64/remat-float0.ll b/llvm/test/CodeGen/AArch64/remat-float0.ll
index 29af7818cf38..0b5d28b196fe 100644
--- a/llvm/test/CodeGen/AArch64/remat-float0.ll
+++ b/llvm/test/CodeGen/AArch64/remat-float0.ll
@@ -1,15 +1,15 @@
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s
-; Check that float 0 gets rematerialized with an fmov of zero reg instead
+; Check that float 0 gets rematerialized with an "movi zero" instead
; of spilled/filled.
declare void @bar(float)
define void @foo() {
; CHECK-LABEL: foo:
-; CHECK: fmov s0, wzr
+; CHECK: movi d0, #0000000000000000
; CHECK: bl bar
-; CHECK: fmov s0, wzr
+; CHECK: movi d0, #0000000000000000
; CHECK: bl bar
call void @bar(float 0.000000e+00)
call void asm sideeffect "", "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"()
More information about the llvm-commits
mailing list