[llvm] b0bfbad - [AArch64] Always lower fp16 zero to FMOVH0
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 15 08:06:38 PST 2023
Author: David Green
Date: 2023-02-15T16:06:32Z
New Revision: b0bfbad19b0698c51a0b932f82f778e67f2d7e0c
URL: https://github.com/llvm/llvm-project/commit/b0bfbad19b0698c51a0b932f82f778e67f2d7e0c
DIFF: https://github.com/llvm/llvm-project/commit/b0bfbad19b0698c51a0b932f82f778e67f2d7e0c.diff
LOG: [AArch64] Always lower fp16 zero to FMOVH0
We can always use FMOVH0 to lower fp16 zero, even without fullfp16. We can
either expand it to movi d0, #0 or fmov s0, wzr, which will both clear all the
bits of the register.
Differential Revision: https://reviews.llvm.org/D143988
Added:
Modified:
llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-fmax.ll
llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
llvm/test/CodeGen/AArch64/f16-imm.ll
llvm/test/CodeGen/AArch64/mattr-all.ll
llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index e93d0c0c3d28..1c9a790c6d72 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1254,7 +1254,9 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected opcode");
case AArch64::FMOVH0:
- FMov.setOpcode(AArch64::FMOVWHr);
+ FMov.setOpcode(STI->hasFullFP16() ? AArch64::FMOVWHr : AArch64::FMOVWSr);
+ if (!STI->hasFullFP16())
+ DestReg = (AArch64::S0 + (DestReg - AArch64::H0));
FMov.addOperand(MCOperand::createReg(DestReg));
FMov.addOperand(MCOperand::createReg(AArch64::WZR));
break;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7f3497048697..603c94a72497 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9772,14 +9772,16 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f32)
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
- else if (VT == MVT::f16 && Subtarget->hasFullFP16())
- IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
- // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
- // generate that fmov.
+ else if (VT == MVT::f16)
+ IsLegal =
+ (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
+ Imm.isPosZero();
// If we can not materialize in immediate field for fmov, check if the
// value can be encoded as the immediate operand of a logical instruction.
// The immediate value will be created with either MOVZ, MOVN, or ORR.
+ // TODO: fmov h0, w0 is also legal, however we don't have an isel pattern to
+ // generate that fmov.
if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
// The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
// however the mov+fmov sequence is always better because of the reduced
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 505d9787ff70..762563089c28 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4204,7 +4204,7 @@ defm FMOV : UnscaledConversion<"fmov">;
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
- Sched<[WriteF]>, Requires<[HasFullFP16]>;
+ Sched<[WriteF]>;
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
index 24429a8d275a..d7d54a6e48a9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@@ -73,8 +73,7 @@ define float @test_f16(half %in) {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: adrp x8, .LCPI5_0
-; CHECK-NEXT: ldr h2, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT: movi d2, #0000000000000000
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcsel s0, s0, s2, lt
; CHECK-NEXT: fcvt s0, h0
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index d0e45e5059ec..6c3cd4766d79 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -19,7 +19,7 @@ define void @t1() nounwind ssp {
entry:
; ALL-LABEL: t1:
; ALL-NOT: fmov
-; NONEFP-DAG: ldr h0,{{.*}}
+; NONEFP-DAG: fmov s0, wzr
; NONEFP-DAG: fmov s1, wzr
; NONEFP-DAG: fmov d2, xzr
; NONEFP-DAG: movi{{(.16b)?}} v3{{(.2d)?}}, #0
@@ -27,7 +27,7 @@ entry:
; NONE16: fmov s1, wzr
; NONE16: fmov d2, xzr
; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0
-; ZEROFP-DAG: ldr h0,{{.*}}
+; ZEROFP-DAG: movi d0, #0
; ZEROFP-DAG: movi d1, #0
; ZEROFP-DAG: movi d2, #0
; ZEROFP-DAG: movi v3.2d, #0
diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll
index 7f31465c18b1..6fa994bb1d42 100644
--- a/llvm/test/CodeGen/AArch64/f16-imm.ll
+++ b/llvm/test/CodeGen/AArch64/f16-imm.ll
@@ -16,8 +16,7 @@ define half @Const0() {
;
; CHECK-NOFP16-LABEL: Const0:
; CHECK-NOFP16: // %bb.0: // %entry
-; CHECK-NOFP16-NEXT: adrp x8, .LCPI0_0
-; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI0_0]
+; CHECK-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-NOFP16-NEXT: ret
entry:
ret half 0xH0000
diff --git a/llvm/test/CodeGen/AArch64/mattr-all.ll b/llvm/test/CodeGen/AArch64/mattr-all.ll
index 78a905173678..1da37616c0cb 100644
--- a/llvm/test/CodeGen/AArch64/mattr-all.ll
+++ b/llvm/test/CodeGen/AArch64/mattr-all.ll
@@ -6,8 +6,7 @@
define half @bf16() nounwind {
; CHECK-LABEL: bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: ret
ret half 0xH0000
}
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index ba44bc99ce8c..c2c436ca41a6 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -294,9 +294,8 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
;
; CHECKNOFP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECKNOFP16: // %bb.0: // %entry
-; CHECKNOFP16-NEXT: adrp x9, .LCPI10_0
+; CHECKNOFP16-NEXT: movi d0, #0000000000000000
; CHECKNOFP16-NEXT: mov x8, xzr
-; CHECKNOFP16-NEXT: ldr h0, [x9, :lo12:.LCPI10_0]
; CHECKNOFP16-NEXT: .LBB10_1: // %loop
; CHECKNOFP16-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKNOFP16-NEXT: ldr d1, [x0, x8]
@@ -365,9 +364,8 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
;
; CHECKNOFP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECKNOFP16: // %bb.0: // %entry
-; CHECKNOFP16-NEXT: adrp x9, .LCPI11_0
+; CHECKNOFP16-NEXT: movi d0, #0000000000000000
; CHECKNOFP16-NEXT: mov x8, xzr
-; CHECKNOFP16-NEXT: ldr h0, [x9, :lo12:.LCPI11_0]
; CHECKNOFP16-NEXT: .LBB11_1: // %loop
; CHECKNOFP16-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKNOFP16-NEXT: ldr q1, [x0, x8]
More information about the llvm-commits
mailing list