[llvm] [LegalizeVectorOps][PowerPC] Use xor to expand fneg. (PR #106595)

Thu Aug 29 10:52:24 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-selectiondag

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

This preserves the semantis of fneg and matches what we do in LegalizeDAG.

I kept the legal FSUB check to force unrolling for some targets that don't have FSUB but have XOR. On Aarch64 using xor broke some tests that expected to see a (v1f64 (fma (insertvector_elt (f64 (fneg (extractvectorelt X)))))) pattern.

---
Full diff: https://github.com/llvm/llvm-project/pull/106595.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (+11-5) 
- (modified) llvm/test/CodeGen/PowerPC/fma-negate.ll (+1-1) 
- (modified) llvm/test/CodeGen/PowerPC/fp-strict.ll (+4-4) 
- (modified) llvm/test/CodeGen/PowerPC/vec_abs.ll (+1-1) 
- (modified) llvm/test/CodeGen/PowerPC/vec_fneg.ll (+1-1) 


``````````diff

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3f104baed97b1a..2557fa288606e7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1669,12 +1669,18 @@ void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
 }
 
 SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
-  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
+  EVT VT = Node->getValueType(0);
+  EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+  // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
+  if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
+      TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) {
     SDLoc DL(Node);
-    SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
-    // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
-    return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
-                       Node->getOperand(0));
+    SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+    SDValue SignMask = DAG.getConstant(
+        APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+    SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask);
+    return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
   }
   return DAG.UnrollVectorOp(Node);
 }
diff --git a/llvm/test/CodeGen/PowerPC/fma-negate.ll b/llvm/test/CodeGen/PowerPC/fma-negate.ll
index 22118c44ece706..1f8e0968ca98ea 100644
--- a/llvm/test/CodeGen/PowerPC/fma-negate.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-negate.ll
@@ -155,7 +155,7 @@ define <4 x float> @test_neg_fma_v4f32(<4 x float> %a, <4 x float> %b,
 ; NO-VSX:       # %bb.0: # %entry
 ; NO-VSX-NEXT:    vspltisb 5, -1
 ; NO-VSX-NEXT:    vslw 5, 5, 5
-; NO-VSX-NEXT:    vsubfp 2, 5, 2
+; NO-VSX-NEXT:    vxor 2, 2, 5
 ; NO-VSX-NEXT:    vmaddfp 2, 2, 3, 4
 ; NO-VSX-NEXT:    blr
                                        <4 x float> %c) {
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict.ll b/llvm/test/CodeGen/PowerPC/fp-strict.ll
index 124c588ba242c1..d3025f1da658af 100644
--- a/llvm/test/CodeGen/PowerPC/fp-strict.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-strict.ll
@@ -915,7 +915,7 @@ define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX-NEXT:    vslw v5, v5, v5
 ; NOVSX-NEXT:    stvx v3, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -64
-; NOVSX-NEXT:    vsubfp v4, v5, v4
+; NOVSX-NEXT:    vxor v4, v4, v5
 ; NOVSX-NEXT:    stvx v2, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -32
 ; NOVSX-NEXT:    stvx v4, 0, r3
@@ -1213,7 +1213,7 @@ define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX-NEXT:    fmadds f0, f2, f1, f0
 ; NOVSX-NEXT:    stfs f0, -16(r1)
 ; NOVSX-NEXT:    lvx v2, 0, r3
-; NOVSX-NEXT:    vsubfp v2, v3, v2
+; NOVSX-NEXT:    vxor v2, v2, v3
 ; NOVSX-NEXT:    blr
 ;
 ; SPE-LABEL: fnmadd_v4f32:
@@ -1462,7 +1462,7 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX-NEXT:    vslw v5, v5, v5
 ; NOVSX-NEXT:    stvx v3, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -64
-; NOVSX-NEXT:    vsubfp v4, v5, v4
+; NOVSX-NEXT:    vxor v4, v4, v5
 ; NOVSX-NEXT:    stvx v2, 0, r3
 ; NOVSX-NEXT:    addi r3, r1, -32
 ; NOVSX-NEXT:    stvx v4, 0, r3
@@ -1488,7 +1488,7 @@ define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float>
 ; NOVSX-NEXT:    fmadds f0, f1, f0, f2
 ; NOVSX-NEXT:    stfs f0, -16(r1)
 ; NOVSX-NEXT:    lvx v2, 0, r3
-; NOVSX-NEXT:    vsubfp v2, v5, v2
+; NOVSX-NEXT:    vxor v2, v2, v5
 ; NOVSX-NEXT:    blr
 ;
 ; SPE-LABEL: fnmsub_v4f32:
diff --git a/llvm/test/CodeGen/PowerPC/vec_abs.ll b/llvm/test/CodeGen/PowerPC/vec_abs.ll
index f7ff18f3ce1790..50dcfc3faf62e9 100644
--- a/llvm/test/CodeGen/PowerPC/vec_abs.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_abs.ll
@@ -44,7 +44,7 @@ define <4 x float> @test2_float(<4 x float> %aa) #0 {
 ; CHECK-NOVSX: fabs
 ; CHECK-NOVSX: fabs
 ; CHECK-NOVSX: fabs
-; CHECK-NOVSX: vsubfp
+; CHECK-NOVSX: vxor
 ; CHECK-NOVSX: blr
 
 define <2 x double> @test_double(<2 x double> %aa) #0 {
diff --git a/llvm/test/CodeGen/PowerPC/vec_fneg.ll b/llvm/test/CodeGen/PowerPC/vec_fneg.ll
index 2854a31cad9e17..bbbdd45cbb01ac 100644
--- a/llvm/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_fneg.ll
@@ -15,7 +15,7 @@ define void @test_float(ptr %A) {
 
 ; CHECK: xvnegsp
 ; CHECK: blr
-; CHECK-NOVSX: vsubfp
+; CHECK-NOVSX: vxor
 ; CHECK-NOVSX: blr
 
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/106595