[llvm] 3d6f18d - [AArch64] Remove redundant FDIV Combine. (#91924)

Tue May 14 08:28:32 PDT 2024

Author: Sander de Smalen
Date: 2024-05-14T16:28:28+01:00
New Revision: 3d6f18db7b5bbf85bdd40c7c7d627baff2802b7c

URL: https://github.com/llvm/llvm-project/commit/3d6f18db7b5bbf85bdd40c7c7d627baff2802b7c
DIFF: https://github.com/llvm/llvm-project/commit/3d6f18db7b5bbf85bdd40c7c7d627baff2802b7c.diff

LOG: [AArch64] Remove redundant FDIV Combine. (#91924)

The target combine is no longer required because InstCombine will
transform the DIV by a power of 2 into a multiply, so in practice
this case will never trigger.

Additionally, the generated code would have been incorrect for
streaming(-compatible) functions, because it assumed NEON was available.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Removed: 
    llvm/test/CodeGen/AArch64/fdiv_combine.ll
    llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f6d80f78910cf..2ec9f66214b65 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1040,7 +1040,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                        ISD::UINT_TO_FP});
 
   setTargetDAGCombine({ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
-                       ISD::FP_TO_UINT_SAT, ISD::FADD, ISD::FDIV});
+                       ISD::FP_TO_UINT_SAT, ISD::FADD});
 
   // Try and combine setcc with csel
   setTargetDAGCombine(ISD::SETCC);
@@ -17963,75 +17963,6 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
   return FixConv;
 }
 
-/// Fold a floating-point divide by power of two into fixed-point to
-/// floating-point conversion.
-static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
-                                  TargetLowering::DAGCombinerInfo &DCI,
-                                  const AArch64Subtarget *Subtarget) {
-  if (!Subtarget->hasNEON())
-    return SDValue();
-
-  SDValue Op = N->getOperand(0);
-  unsigned Opc = Op->getOpcode();
-  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
-      !Op.getOperand(0).getValueType().isSimple() ||
-      (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
-    return SDValue();
-
-  SDValue ConstVec = N->getOperand(1);
-  if (!isa<BuildVectorSDNode>(ConstVec))
-    return SDValue();
-
-  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
-  int32_t IntBits = IntTy.getSizeInBits();
-  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
-    return SDValue();
-
-  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
-  int32_t FloatBits = FloatTy.getSizeInBits();
-  if (FloatBits != 32 && FloatBits != 64)
-    return SDValue();
-
-  // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
-  if (IntBits > FloatBits)
-    return SDValue();
-
-  BitVector UndefElements;
-  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
-  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
-  if (C == -1 || C == 0 || C > FloatBits)
-    return SDValue();
-
-  MVT ResTy;
-  unsigned NumLanes = Op.getValueType().getVectorNumElements();
-  switch (NumLanes) {
-  default:
-    return SDValue();
-  case 2:
-    ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
-    break;
-  case 4:
-    ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
-    break;
-  }
-
-  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  SDLoc DL(N);
-  SDValue ConvInput = Op.getOperand(0);
-  bool IsSigned = Opc == ISD::SINT_TO_FP;
-  if (IntBits < FloatBits)
-    ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
-                            ResTy, ConvInput);
-
-  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
-                                      : Intrinsic::aarch64_neon_vcvtfxu2fp;
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
-                     DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
-                     DAG.getConstant(C, DL, MVT::i32));
-}
-
 static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                                const AArch64TargetLowering &TLI) {
   EVT VT = N->getValueType(0);
@@ -24720,8 +24651,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FP_TO_SINT_SAT:
   case ISD::FP_TO_UINT_SAT:
     return performFpToIntCombine(N, DAG, DCI, Subtarget);
-  case ISD::FDIV:
-    return performFDivCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
     return performORCombine(N, DCI, Subtarget, *this);
   case ISD::AND:

diff  --git a/llvm/test/CodeGen/AArch64/fdiv_combine.ll b/llvm/test/CodeGen/AArch64/fdiv_combine.ll
deleted file mode 100644
index 10b5f4386dd56..0000000000000
--- a/llvm/test/CodeGen/AArch64/fdiv_combine.ll
+++ /dev/null
@@ -1,126 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
-
-; Test signed conversion.
-define <2 x float> @test1(<2 x i32> %in) {
-; CHECK-LABEL: test1:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    scvtf.2s v0, v0, #4
-; CHECK-NEXT:    ret
-entry:
-  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
-  %div.i = fdiv <2 x float> %vcvt.i, <float 16.0, float 16.0>
-  ret <2 x float> %div.i
-}
-
-; Test unsigned conversion.
-define <2 x float> @test2(<2 x i32> %in) {
-; CHECK-LABEL: test2:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ucvtf.2s v0, v0, #3
-; CHECK-NEXT:    ret
-entry:
-  %vcvt.i = uitofp <2 x i32> %in to <2 x float>
-  %div.i = fdiv <2 x float> %vcvt.i, <float 8.0, float 8.0>
-  ret <2 x float> %div.i
-}
-
-; Test which should not fold due to non-power of 2.
-define <2 x float> @test3(<2 x i32> %in) {
-; CHECK-LABEL: test3:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov.2s v1, #9.00000000
-; CHECK-NEXT:    scvtf.2s v0, v0
-; CHECK-NEXT:    fdiv.2s v0, v0, v1
-; CHECK-NEXT:    ret
-entry:
-  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
-  %div.i = fdiv <2 x float> %vcvt.i, <float 9.0, float 9.0>
-  ret <2 x float> %div.i
-}
-
-; Test which should not fold due to power of 2 out of range.
-define <2 x float> @test4(<2 x i32> %in) {
-; CHECK-LABEL: test4:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi.2s v1, #80, lsl #24
-; CHECK-NEXT:    scvtf.2s v0, v0
-; CHECK-NEXT:    fdiv.2s v0, v0, v1
-; CHECK-NEXT:    ret
-entry:
-  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
-  %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
-  ret <2 x float> %div.i
-}
-
-; Test case where const is max power of 2 (i.e., 2^32).
-define <2 x float> @test5(<2 x i32> %in) {
-; CHECK-LABEL: test5:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    scvtf.2s v0, v0, #32
-; CHECK-NEXT:    ret
-entry:
-  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
-  %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
-  ret <2 x float> %div.i
-}
-
-; Test quadword.
-define <4 x float> @test6(<4 x i32> %in) {
-; CHECK-LABEL: test6:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    scvtf.4s v0, v0, #2
-; CHECK-NEXT:    ret
-entry:
-  %vcvt.i = sitofp <4 x i32> %in to <4 x float>
-  %div.i = fdiv <4 x float> %vcvt.i, <float 4.0, float 4.0, float 4.0, float 4.0>
-  ret <4 x float> %div.i
-}
-
-; Test unsigned i16 to float
-define <4 x float> @test7(<4 x i16> %in) {
-; CHECK-LABEL: test7:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushll.4s v0, v0, #0
-; CHECK-NEXT:    ucvtf.4s v0, v0, #1
-; CHECK-NEXT:    ret
-  %conv = uitofp <4 x i16> %in to <4 x float>
-  %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
-  ret <4 x float> %shift
-}
-
-; Test signed i16 to float
-define <4 x float> @test8(<4 x i16> %in) {
-; CHECK-LABEL: test8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sshll.4s v0, v0, #0
-; CHECK-NEXT:    scvtf.4s v0, v0, #2
-; CHECK-NEXT:    ret
-  %conv = sitofp <4 x i16> %in to <4 x float>
-  %shift = fdiv <4 x float> %conv, <float 4.0, float 4.0, float 4.0, float 4.0>
-  ret <4 x float> %shift
-}
-
-; Can't convert i64 to float.
-define <2 x float> @test9(<2 x i64> %in) {
-; CHECK-LABEL: test9:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf.2d v0, v0
-; CHECK-NEXT:    movi.2s v1, #64, lsl #24
-; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fdiv.2s v0, v0, v1
-; CHECK-NEXT:    ret
-  %conv = uitofp <2 x i64> %in to <2 x float>
-  %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
-  ret <2 x float> %shift
-}
-
-define <2 x double> @test10(<2 x i64> %in) {
-; CHECK-LABEL: test10:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf.2d v0, v0, #1
-; CHECK-NEXT:    ret
-  %conv = uitofp <2 x i64> %in to <2 x double>
-  %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
-  ret <2 x double> %shift
-}

diff  --git a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll
deleted file mode 100644
index 5a5a669e92eeb..0000000000000
--- a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s
-
-define <16 x double> @test_sitofp_fixed(<16 x i32> %in) {
-; CHECK-LABEL: test_sitofp_fixed:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    sshll2.2d v4, v0, #0
-; CHECK-NEXT:    sshll.2d v0, v0, #0
-; CHECK-NEXT:    sshll2.2d v5, v1, #0
-; CHECK-NEXT:    sshll.2d v6, v1, #0
-; CHECK-NEXT:    sshll.2d v7, v2, #0
-; CHECK-NEXT:    sshll2.2d v16, v2, #0
-; CHECK-NEXT:    sshll2.2d v17, v3, #0
-; CHECK-NEXT:    sshll.2d v18, v3, #0
-; CHECK-NEXT:    scvtf.2d v1, v4, #6
-; CHECK-NEXT:    scvtf.2d v0, v0, #6
-; CHECK-NEXT:    scvtf.2d v3, v5, #6
-; CHECK-NEXT:    scvtf.2d v2, v6, #6
-; CHECK-NEXT:    scvtf.2d v4, v7, #6
-; CHECK-NEXT:    scvtf.2d v5, v16, #6
-; CHECK-NEXT:    scvtf.2d v7, v17, #6
-; CHECK-NEXT:    scvtf.2d v6, v18, #6
-; CHECK-NEXT:    ret
-
-  %flt = sitofp <16 x i32> %in to <16 x double>
-  %res = fdiv <16 x double> %flt, <double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0, double 64.0>
-  ret <16 x double> %res
-}
-
-; This one is small enough to satisfy isSimple, but still illegally large.
-define <4 x double> @test_sitofp_fixed_shortish(<4 x i64> %in) {
-; CHECK-LABEL: test_sitofp_fixed_shortish:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    scvtf.2d v0, v0, #6
-; CHECK-NEXT:    scvtf.2d v1, v1, #6
-; CHECK-NEXT:    ret
-
-
-  %flt = sitofp <4 x i64> %in to <4 x double>
-  %res = fdiv <4 x double> %flt, <double 64.0, double 64.0, double 64.0, double 64.0>
-  ret <4 x double> %res
-}