[llvm] 512ab99 - [DAG] foldConstantFPMath - fold vector splats as well as scalar constants

Fri Dec 17 07:19:35 PST 2021

Author: Simon Pilgrim
Date: 2021-12-17T15:19:26Z
New Revision: 512ab9968d5f7a655f797e542b8bc9c48674c0d7

URL: https://github.com/llvm/llvm-project/commit/512ab9968d5f7a655f797e542b8bc9c48674c0d7
DIFF: https://github.com/llvm/llvm-project/commit/512ab9968d5f7a655f797e542b8bc9c48674c0d7.diff

LOG: [DAG] foldConstantFPMath - fold vector splats as well as scalar constants

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index baa8e641a5fd..450a0fe06878 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5480,8 +5480,8 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
   //       should. That will require dealing with a potentially non-default
   //       rounding mode, checking the "opStatus" return value from the APFloat
   //       math calculations, and possibly other variations.
-  auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
-  auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false);
+  ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false);
   if (N1CFP && N2CFP) {
     APFloat C1 = N1CFP->getValueAPF(); // make copy
     const APFloat &C2 = N2CFP->getValueAPF();
@@ -5520,8 +5520,9 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
   switch (Opcode) {
   case ISD::FSUB:
     // -0.0 - undef --> undef (consistent with "fneg undef")
-    if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef())
-      return getUNDEF(VT);
+    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true))
+      if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef())
+        return getUNDEF(VT);
     LLVM_FALLTHROUGH;
 
   case ISD::FADD:

diff  --git a/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll b/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
index b582d42c4131..5c542c4ca79a 100644
--- a/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
+++ b/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
@@ -96,11 +96,8 @@ entry:
 define void @test_concat_fptrunc_v4f64_to_v4f32(<vscale x 4 x float>* %ptr) #1 {
 ; CHECK-LABEL: test_concat_fptrunc_v4f64_to_v4f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov z0.d, #1.00000000
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
+; CHECK-NEXT:    fmov z0.s, #1.00000000
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
 entry: