[llvm] f4c5cad - [ARM] Select f32 constants with vmov.f16
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 21 03:11:01 PDT 2020
Author: David Green
Date: 2020-09-21T11:10:47+01:00
New Revision: f4c5cadbcbb41f13cff0905449cfff4aef6a083c
URL: https://github.com/llvm/llvm-project/commit/f4c5cadbcbb41f13cff0905449cfff4aef6a083c
DIFF: https://github.com/llvm/llvm-project/commit/f4c5cadbcbb41f13cff0905449cfff4aef6a083c.diff
LOG: [ARM] Select f32 constants with vmov.f16
This adds lowering for f32 values using the vmov.f16, which zeroes the
top bits whilst setting the lower bits to a pattern. This range of
values does not often come up, except where a f16 constant value has
been converted to a f32.
Differential Revision: https://reviews.llvm.org/D87790
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMInstrVFP.td
llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
llvm/test/CodeGen/ARM/fp16-bitcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 0235d6aacfda..70e8a797f869 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18067,6 +18067,9 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
if (VT == MVT::f16 && Subtarget->hasFullFP16())
return ARM_AM::getFP16Imm(Imm) != -1;
+ if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
+ ARM_AM::getFP32FP16Imm(Imm) != -1)
+ return true;
if (VT == MVT::f32)
return ARM_AM::getFP32Imm(Imm) != -1;
if (VT == MVT::f64 && Subtarget->hasFP64())
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index cf4bcc743d8f..2336bbfe547e 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -54,6 +54,16 @@ def vfp_f16imm : Operand<f16>,
let ParserMatchClass = FPImmOperand;
}
+def vfp_f32f16imm_xform : SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = ARM_AM::getFP32FP16Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>;
+
+def vfp_f32f16imm : PatLeaf<(f32 fpimm), [{
+ return ARM_AM::getFP32FP16Imm(N->getValueAPF()) != -1;
+ }], vfp_f32f16imm_xform>;
+
def vfp_f32imm_xform : SDNodeXForm<fpimm, [{
APFloat InVal = N->getValueAPF();
uint32_t enc = ARM_AM::getFP32Imm(InVal);
@@ -2637,6 +2647,11 @@ def FCONSTH : VFPAI<(outs HPR:$Sd), (ins vfp_f16imm:$imm),
}
}
+def : Pat<(f32 (vfp_f32f16imm:$imm)),
+ (f32 (COPY_TO_REGCLASS (f16 (FCONSTH (vfp_f32f16imm_xform (f32 $imm)))), SPR))> {
+ let Predicates = [HasFullFP16];
+}
+
//===----------------------------------------------------------------------===//
// Assembler aliases.
//
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index dca252a177f2..8459b4ff2a14 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -687,6 +687,18 @@ namespace ARM_AM {
return getFP16Imm(FPImm.bitcastToAPInt());
}
+ /// If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding
+ /// for it. Otherwise return -1 like getFP16Imm.
+ inline int getFP32FP16Imm(const APInt &Imm) {
+ if (Imm.getActiveBits() > 16)
+ return -1;
+ return ARM_AM::getFP16Imm(Imm.trunc(16));
+ }
+
+ inline int getFP32FP16Imm(const APFloat &FPImm) {
+ return getFP32FP16Imm(FPImm.bitcastToAPInt());
+ }
+
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.
diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll b/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
index d1272f88ede1..553453296963 100644
--- a/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
+++ b/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
@@ -874,7 +874,7 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind {
; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]!
; CHECK-MVE-NEXT: push {r7, lr}
; CHECK-MVE-NEXT: sub sp, #4
-; CHECK-MVE-NEXT: vldr s0, .LCPI11_0
+; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
; CHECK-MVE-NEXT: blx r0
; CHECK-MVE-NEXT: vmov.f16 r0, s0
; CHECK-MVE-NEXT: vmov s0, r0
@@ -884,10 +884,6 @@ define half @h1(half (half)* nocapture %hptr) "cmse_nonsecure_entry" nounwind {
; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4
; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
; CHECK-MVE-NEXT: bxns lr
-; CHECK-MVE-NEXT: .p2align 2
-; CHECK-MVE-NEXT: @ %bb.1:
-; CHECK-MVE-NEXT: .LCPI11_0:
-; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
%call = call half %hptr(half 10.0) nounwind
ret half %call
}
@@ -931,25 +927,41 @@ define half @h2(half (half)* nocapture %hptr) nounwind {
; CHECK-8M-NEXT: .LCPI12_0:
; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41
;
-; CHECK-81M-LABEL: h2:
-; CHECK-81M: @ %bb.0: @ %entry
-; CHECK-81M-NEXT: push {r7, lr}
-; CHECK-81M-NEXT: vldr s0, .LCPI12_0
-; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
-; CHECK-81M-NEXT: bic r0, r0, #1
-; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
-; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
-; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
-; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
-; CHECK-81M-NEXT: blxns r0
-; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
-; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
-; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
-; CHECK-81M-NEXT: pop {r7, pc}
-; CHECK-81M-NEXT: .p2align 2
-; CHECK-81M-NEXT: @ %bb.1:
-; CHECK-81M-NEXT: .LCPI12_0:
-; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41
+; CHECK-NO-MVE-LABEL: h2:
+; CHECK-NO-MVE: @ %bb.0: @ %entry
+; CHECK-NO-MVE-NEXT: push {r7, lr}
+; CHECK-NO-MVE-NEXT: vldr s0, .LCPI12_0
+; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-NO-MVE-NEXT: bic r0, r0, #1
+; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-NO-MVE-NEXT: blxns r0
+; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-NO-MVE-NEXT: pop {r7, pc}
+; CHECK-NO-MVE-NEXT: .p2align 2
+; CHECK-NO-MVE-NEXT: @ %bb.1:
+; CHECK-NO-MVE-NEXT: .LCPI12_0:
+; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
+;
+; CHECK-MVE-LABEL: h2:
+; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: push {r7, lr}
+; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
+; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-MVE-NEXT: bic r0, r0, #1
+; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-MVE-NEXT: blxns r0
+; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-MVE-NEXT: pop {r7, pc}
entry:
%call = call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind
ret half %call
@@ -994,25 +1006,41 @@ define half @h3(half (half)* nocapture %hptr) nounwind {
; CHECK-8M-NEXT: .LCPI13_0:
; CHECK-8M-NEXT: .long 0x00004900 @ float 2.61874657E-41
;
-; CHECK-81M-LABEL: h3:
-; CHECK-81M: @ %bb.0: @ %entry
-; CHECK-81M-NEXT: push {r7, lr}
-; CHECK-81M-NEXT: vldr s0, .LCPI13_0
-; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
-; CHECK-81M-NEXT: bic r0, r0, #1
-; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
-; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
-; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
-; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
-; CHECK-81M-NEXT: blxns r0
-; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
-; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
-; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
-; CHECK-81M-NEXT: pop {r7, pc}
-; CHECK-81M-NEXT: .p2align 2
-; CHECK-81M-NEXT: @ %bb.1:
-; CHECK-81M-NEXT: .LCPI13_0:
-; CHECK-81M-NEXT: .long 0x00004900 @ float 2.61874657E-41
+; CHECK-NO-MVE-LABEL: h3:
+; CHECK-NO-MVE: @ %bb.0: @ %entry
+; CHECK-NO-MVE-NEXT: push {r7, lr}
+; CHECK-NO-MVE-NEXT: vldr s0, .LCPI13_0
+; CHECK-NO-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-NO-MVE-NEXT: bic r0, r0, #1
+; CHECK-NO-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-NO-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-NO-MVE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-NO-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-NO-MVE-NEXT: blxns r0
+; CHECK-NO-MVE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-NO-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-NO-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-NO-MVE-NEXT: pop {r7, pc}
+; CHECK-NO-MVE-NEXT: .p2align 2
+; CHECK-NO-MVE-NEXT: @ %bb.1:
+; CHECK-NO-MVE-NEXT: .LCPI13_0:
+; CHECK-NO-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
+;
+; CHECK-MVE-LABEL: h3:
+; CHECK-MVE: @ %bb.0: @ %entry
+; CHECK-MVE-NEXT: push {r7, lr}
+; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
+; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-MVE-NEXT: bic r0, r0, #1
+; CHECK-MVE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-MVE-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-MVE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-MVE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-MVE-NEXT: blxns r0
+; CHECK-MVE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-MVE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-MVE-NEXT: pop {r7, pc}
entry:
%call = tail call half %hptr(half 10.0) "cmse_nonsecure_call" nounwind
ret half %call
@@ -1123,7 +1151,7 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vstr fpcxtns, [sp, #-4]!
; CHECK-MVE-NEXT: push {r6, r7, lr}
-; CHECK-MVE-NEXT: vldr s0, .LCPI15_0
+; CHECK-MVE-NEXT: vmov.f16 s0, #1.000000e+01
; CHECK-MVE-NEXT: blx r0
; CHECK-MVE-NEXT: vmov.f16 r0, s0
; CHECK-MVE-NEXT: vmov s0, r0
@@ -1132,10 +1160,6 @@ define half @h1_minsize(half (half)* nocapture %hptr) "cmse_nonsecure_entry" min
; CHECK-MVE-NEXT: vldr fpcxtns, [sp], #4
; CHECK-MVE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
; CHECK-MVE-NEXT: bxns lr
-; CHECK-MVE-NEXT: .p2align 2
-; CHECK-MVE-NEXT: @ %bb.1:
-; CHECK-MVE-NEXT: .LCPI15_0:
-; CHECK-MVE-NEXT: .long 0x00004900 @ float 2.61874657E-41
entry:
%call = call half %hptr(half 10.0) nounwind
ret half %call
diff --git a/llvm/test/CodeGen/ARM/fp16-bitcast.ll b/llvm/test/CodeGen/ARM/fp16-bitcast.ll
index ad3dc0a9efbf..997d3603437d 100644
--- a/llvm/test/CodeGen/ARM/fp16-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/fp16-bitcast.ll
@@ -152,12 +152,8 @@ define half @constcall() {
;
; CHECK-FP16-HARD-LABEL: constcall:
; CHECK-FP16-HARD: @ %bb.0: @ %entry
-; CHECK-FP16-HARD-NEXT: vldr s0, .LCPI4_0
+; CHECK-FP16-HARD-NEXT: vmov.f16 s0, #1.000000e+01
; CHECK-FP16-HARD-NEXT: b ccc
-; CHECK-FP16-HARD-NEXT: .p2align 2
-; CHECK-FP16-HARD-NEXT: @ %bb.1:
-; CHECK-FP16-HARD-NEXT: .LCPI4_0:
-; CHECK-FP16-HARD-NEXT: .long 0x00004900 @ float 2.61874657E-41
entry:
%call = tail call fast half @ccc(half 0xH4900)
ret half %call
More information about the llvm-commits
mailing list