[llvm] ee7a006 - [RISCV] Promote f16 ceil/floor/round/roundeven/nearbyint/rint/trunc intrinsics to f32 libcalls.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 11 08:32:44 PST 2021
Author: Craig Topper
Date: 2021-11-11T08:28:41-08:00
New Revision: ee7a006ce461740ea2aeb2c17d80a0ff7667f603
URL: https://github.com/llvm/llvm-project/commit/ee7a006ce461740ea2aeb2c17d80a0ff7667f603
DIFF: https://github.com/llvm/llvm-project/commit/ee7a006ce461740ea2aeb2c17d80a0ff7667f603.diff
LOG: [RISCV] Promote f16 ceil/floor/round/roundeven/nearbyint/rint/trunc intrinsics to f32 libcalls.
Previously these would crash. I don't think these can be generated
directly from C. Not sure if any optimizations can introduce them.
Reviewed By: asb
Differential Revision: https://reviews.llvm.org/D113527
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/half-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c5ad971f96cf2..e2565433f156a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -338,7 +338,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f16, Expand);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
}
if (Subtarget.hasStdExtF()) {
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index 32015aec26dd7..89d553f1b4f2c 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -194,6 +194,356 @@ define half @copysign_f16(half %a, half %b) nounwind {
ret half %1
}
+declare half @llvm.floor.f16(half)
+
+define half @floor_f16(half %a) nounwind {
+; RV32IZFH-LABEL: floor_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call floorf at plt
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV32IDZFH-LABEL: floor_f16:
+; RV32IDZFH: # %bb.0:
+; RV32IDZFH-NEXT: addi sp, sp, -16
+; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IDZFH-NEXT: call floorf at plt
+; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: addi sp, sp, 16
+; RV32IDZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: floor_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call floorf at plt
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV64IDZFH-LABEL: floor_f16:
+; RV64IDZFH: # %bb.0:
+; RV64IDZFH-NEXT: addi sp, sp, -16
+; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IDZFH-NEXT: call floorf at plt
+; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IDZFH-NEXT: addi sp, sp, 16
+; RV64IDZFH-NEXT: ret
+ %1 = call half @llvm.floor.f16(half %a)
+ ret half %1
+}
+
+declare half @llvm.ceil.f16(half)
+
+define half @ceil_f16(half %a) nounwind {
+; RV32IZFH-LABEL: ceil_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call ceilf at plt
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV32IDZFH-LABEL: ceil_f16:
+; RV32IDZFH: # %bb.0:
+; RV32IDZFH-NEXT: addi sp, sp, -16
+; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IDZFH-NEXT: call ceilf at plt
+; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: addi sp, sp, 16
+; RV32IDZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: ceil_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call ceilf at plt
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV64IDZFH-LABEL: ceil_f16:
+; RV64IDZFH: # %bb.0:
+; RV64IDZFH-NEXT: addi sp, sp, -16
+; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IDZFH-NEXT: call ceilf at plt
+; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IDZFH-NEXT: addi sp, sp, 16
+; RV64IDZFH-NEXT: ret
+ %1 = call half @llvm.ceil.f16(half %a)
+ ret half %1
+}
+
+declare half @llvm.trunc.f16(half)
+
+define half @trunc_f16(half %a) nounwind {
+; RV32IZFH-LABEL: trunc_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call truncf at plt
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV32IDZFH-LABEL: trunc_f16:
+; RV32IDZFH: # %bb.0:
+; RV32IDZFH-NEXT: addi sp, sp, -16
+; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IDZFH-NEXT: call truncf at plt
+; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: addi sp, sp, 16
+; RV32IDZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: trunc_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call truncf at plt
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV64IDZFH-LABEL: trunc_f16:
+; RV64IDZFH: # %bb.0:
+; RV64IDZFH-NEXT: addi sp, sp, -16
+; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IDZFH-NEXT: call truncf at plt
+; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IDZFH-NEXT: addi sp, sp, 16
+; RV64IDZFH-NEXT: ret
+ %1 = call half @llvm.trunc.f16(half %a)
+ ret half %1
+}
+
+declare half @llvm.rint.f16(half)
+
+define half @rint_f16(half %a) nounwind {
+; RV32IZFH-LABEL: rint_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call rintf at plt
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV32IDZFH-LABEL: rint_f16:
+; RV32IDZFH: # %bb.0:
+; RV32IDZFH-NEXT: addi sp, sp, -16
+; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IDZFH-NEXT: call rintf at plt
+; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: addi sp, sp, 16
+; RV32IDZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: rint_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call rintf at plt
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV64IDZFH-LABEL: rint_f16:
+; RV64IDZFH: # %bb.0:
+; RV64IDZFH-NEXT: addi sp, sp, -16
+; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IDZFH-NEXT: call rintf at plt
+; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IDZFH-NEXT: addi sp, sp, 16
+; RV64IDZFH-NEXT: ret
+ %1 = call half @llvm.rint.f16(half %a)
+ ret half %1
+}
+
+declare half @llvm.nearbyint.f16(half)
+
+define half @nearbyint_f16(half %a) nounwind {
+; RV32IZFH-LABEL: nearbyint_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call nearbyintf at plt
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV32IDZFH-LABEL: nearbyint_f16:
+; RV32IDZFH: # %bb.0:
+; RV32IDZFH-NEXT: addi sp, sp, -16
+; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IDZFH-NEXT: call nearbyintf at plt
+; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: addi sp, sp, 16
+; RV32IDZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: nearbyint_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call nearbyintf at plt
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV64IDZFH-LABEL: nearbyint_f16:
+; RV64IDZFH: # %bb.0:
+; RV64IDZFH-NEXT: addi sp, sp, -16
+; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IDZFH-NEXT: call nearbyintf at plt
+; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IDZFH-NEXT: addi sp, sp, 16
+; RV64IDZFH-NEXT: ret
+ %1 = call half @llvm.nearbyint.f16(half %a)
+ ret half %1
+}
+
+declare half @llvm.round.f16(half)
+
+define half @round_f16(half %a) nounwind {
+; RV32IZFH-LABEL: round_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call roundf at plt
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV32IDZFH-LABEL: round_f16:
+; RV32IDZFH: # %bb.0:
+; RV32IDZFH-NEXT: addi sp, sp, -16
+; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IDZFH-NEXT: call roundf at plt
+; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: addi sp, sp, 16
+; RV32IDZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: round_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call roundf at plt
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV64IDZFH-LABEL: round_f16:
+; RV64IDZFH: # %bb.0:
+; RV64IDZFH-NEXT: addi sp, sp, -16
+; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IDZFH-NEXT: call roundf at plt
+; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IDZFH-NEXT: addi sp, sp, 16
+; RV64IDZFH-NEXT: ret
+ %1 = call half @llvm.round.f16(half %a)
+ ret half %1
+}
+
+declare half @llvm.roundeven.f16(half)
+
+define half @roundeven_f16(half %a) nounwind {
+; RV32IZFH-LABEL: roundeven_f16:
+; RV32IZFH: # %bb.0:
+; RV32IZFH-NEXT: addi sp, sp, -16
+; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT: call roundevenf at plt
+; RV32IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: ret
+;
+; RV32IDZFH-LABEL: roundeven_f16:
+; RV32IDZFH: # %bb.0:
+; RV32IDZFH-NEXT: addi sp, sp, -16
+; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV32IDZFH-NEXT: call roundevenf at plt
+; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: addi sp, sp, 16
+; RV32IDZFH-NEXT: ret
+;
+; RV64IZFH-LABEL: roundeven_f16:
+; RV64IZFH: # %bb.0:
+; RV64IZFH-NEXT: addi sp, sp, -16
+; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IZFH-NEXT: call roundevenf at plt
+; RV64IZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT: addi sp, sp, 16
+; RV64IZFH-NEXT: ret
+;
+; RV64IDZFH-LABEL: roundeven_f16:
+; RV64IDZFH: # %bb.0:
+; RV64IDZFH-NEXT: addi sp, sp, -16
+; RV64IDZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0
+; RV64IDZFH-NEXT: call roundevenf at plt
+; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0
+; RV64IDZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IDZFH-NEXT: addi sp, sp, 16
+; RV64IDZFH-NEXT: ret
+ %1 = call half @llvm.roundeven.f16(half %a)
+ ret half %1
+}
+
declare iXLen @llvm.lrint.iXLen.f16(float)
define iXLen @lrint_f16(float %a) nounwind {
More information about the llvm-commits
mailing list