[llvm-branch-commits] [llvm] release/21.x: [CodeGen] More consistently expand float ops by default (#150597) (PR #150970)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jul 28 08:21:41 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport fe0dbe0f2950d95071be7140c7b4680f17a7ac4e
Requested by: @<!-- -->nikic
---
Full diff: https://github.com/llvm/llvm-project/pull/150970.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/TargetLoweringBase.cpp (+17-17)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+6-4)
- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+11)
- (added) llvm/test/CodeGen/PowerPC/froundeven-legalization.ll (+111)
``````````diff
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 6feeb19bb8589..db2065f878727 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -806,7 +806,17 @@ void TargetLoweringBase::initActions() {
ISD::SDIVFIX, ISD::SDIVFIXSAT,
ISD::UDIVFIX, ISD::UDIVFIXSAT,
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
- ISD::IS_FPCLASS},
+ ISD::IS_FPCLASS, ISD::FCBRT,
+ ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10, ISD::FEXP,
+ ISD::FEXP2, ISD::FEXP10,
+ ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FCEIL, ISD::FRINT,
+ ISD::FTRUNC, ISD::FROUNDEVEN,
+ ISD::FTAN, ISD::FACOS,
+ ISD::FASIN, ISD::FATAN,
+ ISD::FCOSH, ISD::FSINH,
+ ISD::FTANH, ISD::FATAN2},
VT, Expand);
// Overflow operations default to expand
@@ -852,13 +862,12 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector())
- setOperationAction(
- {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG,
- ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
- ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
- ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
- ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
- VT, Expand);
+ setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG,
+ ISD::ANY_EXTEND_VECTOR_INREG,
+ ISD::SIGN_EXTEND_VECTOR_INREG,
+ ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR,
+ ISD::LRINT, ISD::LLRINT, ISD::LROUND, ISD::LLROUND},
+ VT, Expand);
// Constrained floating-point operations default to expand.
#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
@@ -914,15 +923,6 @@ void TargetLoweringBase::initActions() {
{MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
Expand);
- // These library functions default to expand.
- setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
- ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
- ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
- ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH,
- ISD::FATAN2},
- {MVT::f32, MVT::f64, MVT::f128}, Expand);
-
// Insert custom handling default for llvm.canonicalize.*.
setOperationAction(ISD::FCANONICALIZE,
{MVT::f16, MVT::f32, MVT::f64, MVT::f128}, Expand);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3414fe758eff8..7b93382d1281f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -392,8 +392,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
- ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
- MVT::f32, Legal);
+ ISD::FROUNDEVEN, ISD::FTRUNC},
+ {MVT::f16, MVT::f32}, Legal);
+ setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, MVT::f32, Legal);
setOperationAction(ISD::FLOG2, MVT::f32, Custom);
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
@@ -413,9 +414,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
- if (Subtarget->has16BitInsts())
+ if (Subtarget->has16BitInsts()) {
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
- else {
+ setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Legal);
+ } else {
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fb72bab03e750..9593038ff2c9b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -370,6 +370,11 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FROUND, VT, Legal);
+ setOperationAction(ISD::FROUNDEVEN, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -1507,6 +1512,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
+ setOperationAction(ISD::FRINT, MVT::f16, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f16, Legal);
}
if (Subtarget->hasNEON()) {
diff --git a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
new file mode 100644
index 0000000000000..238e200bfc782
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
+
+define void @test(ptr %p1, ptr %p2) nounwind {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stdu 1, -224(1)
+; CHECK-NEXT: li 5, 48
+; CHECK-NEXT: std 0, 240(1)
+; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT: li 27, 16
+; CHECK-NEXT: std 28, 192(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT: li 29, 32
+; CHECK-NEXT: li 28, 48
+; CHECK-NEXT: stxvd2x 56, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 64
+; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill
+; CHECK-NEXT: mr 30, 4
+; CHECK-NEXT: stxvd2x 57, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 80
+; CHECK-NEXT: stxvd2x 58, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 96
+; CHECK-NEXT: lxvd2x 58, 0, 3
+; CHECK-NEXT: stxvd2x 59, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 112
+; CHECK-NEXT: lxvd2x 59, 3, 27
+; CHECK-NEXT: stxvd2x 60, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 128
+; CHECK-NEXT: stxvd2x 61, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 144
+; CHECK-NEXT: stxvd2x 62, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 160
+; CHECK-NEXT: lxvd2x 62, 3, 28
+; CHECK-NEXT: stxvd2x 63, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: lxvd2x 63, 3, 29
+; CHECK-NEXT: xxswapd 57, 58
+; CHECK-NEXT: xxswapd 1, 59
+; CHECK-NEXT: xxswapd 60, 62
+; CHECK-NEXT: xxswapd 61, 63
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 56, 1
+; CHECK-NEXT: xxlor 1, 59, 59
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 60, 60
+; CHECK-NEXT: xxmrgld 59, 0, 56
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 60, 1
+; CHECK-NEXT: xxlor 1, 62, 62
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 61, 61
+; CHECK-NEXT: xxmrgld 62, 0, 60
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 61, 1
+; CHECK-NEXT: xxlor 1, 63, 63
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 57, 57
+; CHECK-NEXT: xxmrgld 63, 0, 61
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 61, 1
+; CHECK-NEXT: xxlor 1, 58, 58
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: li 3, 160
+; CHECK-NEXT: stxvd2x 63, 30, 29
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: stxvd2x 62, 30, 28
+; CHECK-NEXT: stxvd2x 59, 30, 27
+; CHECK-NEXT: ld 29, 200(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 28, 192(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 27, 184(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 144
+; CHECK-NEXT: xxmrgld 0, 0, 61
+; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 128
+; CHECK-NEXT: stxvd2x 0, 0, 30
+; CHECK-NEXT: ld 30, 208(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 112
+; CHECK-NEXT: lxvd2x 60, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 96
+; CHECK-NEXT: lxvd2x 59, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 80
+; CHECK-NEXT: lxvd2x 58, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 64
+; CHECK-NEXT: lxvd2x 57, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 48
+; CHECK-NEXT: lxvd2x 56, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 224
+; CHECK-NEXT: ld 0, 16(1)
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+ %v = load <8 x double>, ptr %p1, align 64
+ %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %v)
+ store <8 x double> %res, ptr %p2, align 64
+ ret void
+}
+
+declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
``````````
</details>
https://github.com/llvm/llvm-project/pull/150970
More information about the llvm-branch-commits
mailing list