[llvm] [LegalizeVectorOps][PowerPC] Use xor to expand fneg. (PR #106595)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 10:52:24 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
This preserves the semantis of fneg and matches what we do in LegalizeDAG.
I kept the legal FSUB check to force unrolling for some targets that don't have FSUB but have XOR. On Aarch64 using xor broke some tests that expected to see a (v1f64 (fma (insertvector_elt (f64 (fneg (extractvectorelt X)))))) pattern.
---
Full diff: https://github.com/llvm/llvm-project/pull/106595.diff
5 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (+11-5)
- (modified) llvm/test/CodeGen/PowerPC/fma-negate.ll (+1-1)
- (modified) llvm/test/CodeGen/PowerPC/fp-strict.ll (+4-4)
- (modified) llvm/test/CodeGen/PowerPC/vec_abs.ll (+1-1)
- (modified) llvm/test/CodeGen/PowerPC/vec_fneg.ll (+1-1)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3f104baed97b1a..2557fa288606e7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1669,12 +1669,18 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
}
SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
- if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
+ if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
+ TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) {
SDLoc DL(Node);
- SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
- // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
- return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
- Node->getOperand(0));
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
}
return DAG.UnrollVectorOp(Node);
}
diff --git a/llvm/test/CodeGen/PowerPC/fma-negate.ll b/llvm/test/CodeGen/PowerPC/fma-negate.ll
index 22118c44ece706..1f8e0968ca98ea 100644
--- a/llvm/test/CodeGen/PowerPC/fma-negate.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-negate.ll
@@ -155,7 +155,7 @@ define <4 x float> @test_neg_fma_v4f32(<4 x float> %a, <4 x float> %b,
; NO-VSX: # %bb.0: # %entry
; NO-VSX-NEXT: vspltisb 5, -1
; NO-VSX-NEXT: vslw 5, 5, 5
-; NO-VSX-NEXT: vsubfp 2, 5, 2
+; NO-VSX-NEXT: vxor 2, 2, 5
; NO-VSX-NEXT: vmaddfp 2, 2, 3, 4
; NO-VSX-NEXT: blr
<4 x float> %c) {
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll
index 124c588ba242c1..d3025f1da658af 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll
@@ -915,7 +915,7 @@ define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
; NOVSX-NEXT: vslw v5, v5, v5
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: addi r3, r1, -64
-; NOVSX-NEXT: vsubfp v4, v5, v4
+; NOVSX-NEXT: vxor v4, v4, v5
; NOVSX-NEXT: stvx v2, 0, r3
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: stvx v4, 0, r3
@@ -1213,7 +1213,7 @@ define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
; NOVSX-NEXT: fmadds f0, f2, f1, f0
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
-; NOVSX-NEXT: vsubfp v2, v3, v2
+; NOVSX-NEXT: vxor v2, v2, v3
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmadd_v4f32:
@@ -1462,7 +1462,7 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
; NOVSX-NEXT: vslw v5, v5, v5
; NOVSX-NEXT: stvx v3, 0, r3
; NOVSX-NEXT: addi r3, r1, -64
-; NOVSX-NEXT: vsubfp v4, v5, v4
+; NOVSX-NEXT: vxor v4, v4, v5
; NOVSX-NEXT: stvx v2, 0, r3
; NOVSX-NEXT: addi r3, r1, -32
; NOVSX-NEXT: stvx v4, 0, r3
@@ -1488,7 +1488,7 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
; NOVSX-NEXT: fmadds f0, f1, f0, f2
; NOVSX-NEXT: stfs f0, -16(r1)
; NOVSX-NEXT: lvx v2, 0, r3
-; NOVSX-NEXT: vsubfp v2, v5, v2
+; NOVSX-NEXT: vxor v2, v2, v5
; NOVSX-NEXT: blr
;
; SPE-LABEL: fnmsub_v4f32:
diff --git a/llvm/test/CodeGen/PowerPC/vec_abs.ll b/llvm/test/CodeGen/PowerPC/vec_abs.ll
index f7ff18f3ce1790..50dcfc3faf62e9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_abs.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_abs.ll
@@ -44,7 +44,7 @@ define <4 x float> @test2_float(<4 x float> %aa) #0 {
; CHECK-NOVSX: fabs
; CHECK-NOVSX: fabs
; CHECK-NOVSX: fabs
-; CHECK-NOVSX: vsubfp
+; CHECK-NOVSX: vxor
; CHECK-NOVSX: blr
define <2 x double> @test_double(<2 x double> %aa) #0 {
diff --git a/llvm/test/CodeGen/PowerPC/vec_fneg.ll b/llvm/test/CodeGen/PowerPC/vec_fneg.ll
index 2854a31cad9e17..bbbdd45cbb01ac 100644
--- a/llvm/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_fneg.ll
@@ -15,7 +15,7 @@ define void @test_float(ptr %A) {
; CHECK: xvnegsp
; CHECK: blr
-; CHECK-NOVSX: vsubfp
+; CHECK-NOVSX: vxor
; CHECK-NOVSX: blr
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/106595
More information about the llvm-commits
mailing list