[llvm] fe0dbe0 - [CodeGen] More consistently expand float ops by default (#150597)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 00:46:04 PDT 2025
Author: Nikita Popov
Date: 2025-07-28T09:46:00+02:00
New Revision: fe0dbe0f2950d95071be7140c7b4680f17a7ac4e
URL: https://github.com/llvm/llvm-project/commit/fe0dbe0f2950d95071be7140c7b4680f17a7ac4e
DIFF: https://github.com/llvm/llvm-project/commit/fe0dbe0f2950d95071be7140c7b4680f17a7ac4e.diff
LOG: [CodeGen] More consistently expand float ops by default (#150597)
These float operations were expanded for scalar f32/f64/f128, but not
for f16 and more problematically, not for vectors. A small subset of
them was separately set to expand for vectors.
Change these to always expand by default, and adjust targets to mark
these as legal where necessary instead.
This is a much safer default, and avoids unnecessary legalization
failures because a target failed to manually mark them as expand.
Fixes https://github.com/llvm/llvm-project/issues/110753.
Fixes https://github.com/llvm/llvm-project/issues/121390.
Added:
llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
Modified:
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index d4a34555ed820..68b8a0044d6ee 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() {
ISD::SDIVFIX, ISD::SDIVFIXSAT,
ISD::UDIVFIX, ISD::UDIVFIXSAT,
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
- ISD::IS_FPCLASS},
+ ISD::IS_FPCLASS, ISD::FCBRT,
+ ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10, ISD::FEXP,
+ ISD::FEXP2, ISD::FEXP10,
+ ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FCEIL, ISD::FRINT,
+ ISD::FTRUNC, ISD::FROUNDEVEN,
+ ISD::FTAN, ISD::FACOS,
+ ISD::FASIN, ISD::FATAN,
+ ISD::FCOSH, ISD::FSINH,
+ ISD::FTANH, ISD::FATAN2},
VT, Expand);
// Overflow operations default to expand
@@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector())
- setOperationAction(
- {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
- ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
- ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
- ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
- ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
- VT, Expand);
+ setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
+ ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG,
+ ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR,
+ ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND},
+ VT, Expand);
// Constrained floating-point operations default to expand.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
@@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() {
{MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
Expand);
- // These library functions default to expand.
- setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
- ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
- ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
- ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH,
- ISD::FATAN2},
- {MVT::f32, MVT::f64, MVT::f128}, Expand);
-
// Insert custom handling default for llvm.canonicalize.*.
setOperationAction(ISD::FCANONICALIZE,
{MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e3ca09e512b3b..f25ce8723a2dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -391,8 +391,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
- ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
- MVT::f32, Legal);
+ ISD::FROUNDEVEN, ISD::FTRUNC},
+ {MVT::f16, MVT::f32}, Legal);
+ setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
@@ -412,9 +413,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
- if (Subtarget->has16BitInsts())
+ if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
- else {
+ setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Legal);
+ } else {
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fca5dff092306..066b392213fa9 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -370,6 +370,11 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FROUND, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -1507,6 +1512,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
+ setOperationAction(ISD::FRINT, MVT::f16, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f16, Legal);
}
if (Subtarget->hasNEON()) {
diff --git a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
new file mode 100644
index 0000000000000..238e200bfc782
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
+
+define void @test(ptr %p1, ptr %p2) nounwind {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stdu 1, -224(1)
+; CHECK-NEXT: li 5, 48
+; CHECK-NEXT: std 0, 240(1)
+; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT: li 27, 16
+; CHECK-NEXT: std 28, 192(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT: li 29, 32
+; CHECK-NEXT: li 28, 48
+; CHECK-NEXT: stxvd2x 56, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 64
+; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill
+; CHECK-NEXT: mr 30, 4
+; CHECK-NEXT: stxvd2x 57, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 80
+; CHECK-NEXT: stxvd2x 58, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 96
+; CHECK-NEXT: lxvd2x 58, 0, 3
+; CHECK-NEXT: stxvd2x 59, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 112
+; CHECK-NEXT: lxvd2x 59, 3, 27
+; CHECK-NEXT: stxvd2x 60, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 128
+; CHECK-NEXT: stxvd2x 61, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 144
+; CHECK-NEXT: stxvd2x 62, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 160
+; CHECK-NEXT: lxvd2x 62, 3, 28
+; CHECK-NEXT: stxvd2x 63, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: lxvd2x 63, 3, 29
+; CHECK-NEXT: xxswapd 57, 58
+; CHECK-NEXT: xxswapd 1, 59
+; CHECK-NEXT: xxswapd 60, 62
+; CHECK-NEXT: xxswapd 61, 63
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 56, 1
+; CHECK-NEXT: xxlor 1, 59, 59
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 60, 60
+; CHECK-NEXT: xxmrgld 59, 0, 56
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 60, 1
+; CHECK-NEXT: xxlor 1, 62, 62
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 61, 61
+; CHECK-NEXT: xxmrgld 62, 0, 60
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 61, 1
+; CHECK-NEXT: xxlor 1, 63, 63
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 57, 57
+; CHECK-NEXT: xxmrgld 63, 0, 61
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 61, 1
+; CHECK-NEXT: xxlor 1, 58, 58
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: li 3, 160
+; CHECK-NEXT: stxvd2x 63, 30, 29
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: stxvd2x 62, 30, 28
+; CHECK-NEXT: stxvd2x 59, 30, 27
+; CHECK-NEXT: ld 29, 200(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 28, 192(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 27, 184(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 144
+; CHECK-NEXT: xxmrgld 0, 0, 61
+; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 128
+; CHECK-NEXT: stxvd2x 0, 0, 30
+; CHECK-NEXT: ld 30, 208(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 112
+; CHECK-NEXT: lxvd2x 60, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 96
+; CHECK-NEXT: lxvd2x 59, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 80
+; CHECK-NEXT: lxvd2x 58, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 64
+; CHECK-NEXT: lxvd2x 57, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 48
+; CHECK-NEXT: lxvd2x 56, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 224
+; CHECK-NEXT: ld 0, 16(1)
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+ %v = load <8 x double>, ptr %p1, align 64
+ %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %v)
+ store <8 x double> %res, ptr %p2, align 64
+ ret void
+}
+
+declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
More information about the llvm-commits
mailing list