[llvm] c37c9f2 - [DAG] Restrict (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) combine
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 18 10:37:45 PDT 2023
Author: Philip Reames
Date: 2023-04-18T10:37:18-07:00
New Revision: c37c9f2fda27b11cb405dbf458b9128632a75834
URL: https://github.com/llvm/llvm-project/commit/c37c9f2fda27b11cb405dbf458b9128632a75834
DIFF: https://github.com/llvm/llvm-project/commit/c37c9f2fda27b11cb405dbf458b9128632a75834.diff
LOG: [DAG] Restrict (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) combine
This transformation creates an copysign node whose argument types do not match. RISCV does not handle such a case which results in a crash today. Looking at the relevant code in DAG, it looks like the process of enabling the non-matching types case was never completed for vectors at all. The transformation which triggered the RISCV crash is a specialization of another transform (specifically due to one use for profitability) which isn't enabled by default. Given that, I chose to match the preconditions for that other transform.
Other options here include:
* Updating RISCV codegen to handle the mismatched argument type case for vectors. This is slightly tricky as I don't see an obvious profitable lowering for this case which doesn't involve simply adding back in the round/trunc.
* Disabling the transform via a target hook.
This patch does involve two changes for AArch64 codegen. These could be called regressions, but well, the code after actually looks better than the code before.
Differential Revision: https://reviews.llvm.org/D148638
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/AArch64/sve-fcopysign.ll
llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ddb91adf61640..7f14ce8426174 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16611,27 +16611,30 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
/// copysign(x, fp_extend(y)) -> copysign(x, y)
/// copysign(x, fp_round(y)) -> copysign(x, y)
-static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
- SDValue N1 = N->getOperand(1);
- if ((N1.getOpcode() == ISD::FP_EXTEND ||
- N1.getOpcode() == ISD::FP_ROUND)) {
- EVT N1VT = N1->getValueType(0);
- EVT N1Op0VT = N1->getOperand(0).getValueType();
+/// Operands to the functions are the type of X and Y respectively.
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
+ // Always fold no-op FP casts.
+ if (XTy == YTy)
+ return true;
- // Always fold no-op FP casts.
- if (N1VT == N1Op0VT)
- return true;
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ if (YTy == MVT::f128)
+ return false;
- // Do not optimize out type conversion of f128 type yet.
- // For some targets like x86_64, configuration is changed to keep one f128
- // value in one SSE register, but instruction selection cannot handle
- // FCOPYSIGN on SSE registers yet.
- if (N1Op0VT == MVT::f128)
- return false;
+ return !YTy.isVector() || EnableVectorFCopySignExtendRound;
+}
- return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound;
- }
- return false;
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() != ISD::FP_EXTEND &&
+ N1.getOpcode() != ISD::FP_ROUND)
+ return false;
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0).getValueType();
+ return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
@@ -16991,7 +16994,13 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
- if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
+ // Note: From a legality perspective, this is a two step transform. First,
+ // we duplicate the fp_round to the arguments of the copysign, then we
+ // eliminate the fp_round on Y. The second step requires an additional
+ // predicate to match the implementation above.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
+ CanCombineFCOPYSIGN_EXTEND_ROUND(VT,
+ N0.getValueType())) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorklist(Tmp.getNode());
diff --git a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll
index aefcdbb46680b..65f1055ffafc0 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll
@@ -259,30 +259,48 @@ define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <v
;========== FCOPYSIGN_EXTEND_ROUND
define <vscale x 4 x half> @test_copysign_nxv4f32_nxv4f16(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: test_copysign_nxv4f32_nxv4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
-; CHECK-NEXT: fcvt z1.h, p0/m, z1.s
-; CHECK-NEXT: and z1.h, z1.h, #0x8000
-; CHECK-NEXT: and z0.h, z0.h, #0x7fff
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
+; CHECK-NO-EXTEND-ROUND: // %bb.0:
+; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
+; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
+; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s
+; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
+; CHECK-NO-EXTEND-ROUND-NEXT: ret
+;
+; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
+; CHECK-EXTEND-ROUND: // %bb.0:
+; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s
+; CHECK-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s
+; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s
+; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000
+; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff
+; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-EXTEND-ROUND-NEXT: ret
%t1 = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
%t2 = fptrunc <vscale x 4 x float> %t1 to <vscale x 4 x half>
ret <vscale x 4 x half> %t2
}
define <vscale x 2 x float> @test_copysign_nxv2f64_nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: test_copysign_nxv2f64_nxv2f32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
-; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
-; CHECK-NEXT: and z1.s, z1.s, #0x80000000
-; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
+; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
+; CHECK-NO-EXTEND-ROUND: // %bb.0:
+; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000
+; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
+; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d
+; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
+; CHECK-NO-EXTEND-ROUND-NEXT: ret
+;
+; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
+; CHECK-EXTEND-ROUND: // %bb.0:
+; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d
+; CHECK-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d
+; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d
+; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000
+; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff
+; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-EXTEND-ROUND-NEXT: ret
%t1 = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
%t2 = fptrunc <vscale x 2 x double> %t1 to <vscale x 2 x float>
ret <vscale x 2 x float> %t2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index eb4e5954515f6..0f4ace490480a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -1431,3 +1431,16 @@ define <vscale x 8 x double> @vfcopynsign_exttrunc_vf_nxv8f64_nxv8f32(<vscale x
%r = call <vscale x 8 x double> @llvm.copysign.nxv8f64(<vscale x 8 x double> %vm, <vscale x 8 x double> %eneg)
ret <vscale x 8 x double> %r
}
+
+define <vscale x 2 x float> @fptrunc_of_copysign_nxv2f32_nxv2f64(<vscale x 2 x double> %X, <vscale x 2 x double> %Y) {
+; CHECK-LABEL: fptrunc_of_copysign_nxv2f32_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v10, v8, v10
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfncvt.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %copy = call fast <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> %X, <vscale x 2 x double> %Y)
+ %trunc = fptrunc <vscale x 2 x double> %copy to <vscale x 2 x float>
+ ret <vscale x 2 x float> %trunc
+}
More information about the llvm-commits
mailing list