[llvm] 9f8dcb0 - [AArch64] Try to detect patterns with fdiv and fmul for [su]cvtf.

Tue Aug 15 02:59:01 PDT 2023

Author: Jingu Kang
Date: 2023-08-15T10:57:07+01:00
New Revision: 9f8dcb070655b4914d47848dcfbba742f12b25bd

URL: https://github.com/llvm/llvm-project/commit/9f8dcb070655b4914d47848dcfbba742f12b25bd
DIFF: https://github.com/llvm/llvm-project/commit/9f8dcb070655b4914d47848dcfbba742f12b25bd.diff

LOG: [AArch64] Try to detect patterns with fdiv and fmul for [su]cvtf.

If fmul's constant operand is the reciprocal of a power of 2 (i.e 1/2^n) or
fdiv's constant operand is power of 2, we can try to match patterns with
[su]int_to_fp for [su]cvtf.

Differential Revision: https://reviews.llvm.org/D156538

Added: 
    llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c01b0eaff94b9a..479067d2fb6a40 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -462,6 +462,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 
   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
 
+  template<unsigned RegWidth>
+  bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
+    return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
+  }
+
+  bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
+                                     unsigned Width);
+
   bool SelectCMP_SWAP(SDNode *N);
 
   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
@@ -3625,9 +3633,10 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
   return true;
 }
 
-bool
-AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
-                                              unsigned RegWidth) {
+static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
+                                               SDValue &FixedPos,
+                                               unsigned RegWidth,
+                                               bool isReciprocal) {
   APFloat FVal(0.0);
   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
     FVal = CN->getValueAPF();
@@ -3652,13 +3661,18 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
   // integers.
   bool IsExact;
 
+  if (isReciprocal)
+    if (!FVal.getExactInverse(&FVal))
+      return false;
+
   // fbits is between 1 and 64 in the worst-case, which means the fmul
   // could have 2^64 as an actual operand. Need 65 bits of precision.
   APSInt IntVal(65, true);
   FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
 
   // N.b. isPowerOf2 also checks for > 0.
-  if (!IsExact || !IntVal.isPowerOf2()) return false;
+  if (!IsExact || !IntVal.isPowerOf2())
+    return false;
   unsigned FBits = IntVal.logBase2();
 
   // Checks above should have guaranteed that we haven't lost information in
@@ -3669,6 +3683,19 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
   return true;
 }
 
+bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+                                                   unsigned RegWidth) {
+  return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
+                                            false);
+}
+
+bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
+                                                        SDValue &FixedPos,
+                                                        unsigned RegWidth) {
+  return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
+                                            true);
+}
+
 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
 // of the string and obtains the integer values from them and combines these
 // into a single value to be used in the MRS/MSR instruction.

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index ac7b208e3f075c..9eccfedd9b6529 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -703,6 +703,28 @@ def fixedpoint_f16_i64 : fixedpoint_i64<f16>;
 def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
 def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
 
+class fixedpoint_recip_i32<ValueType FloatVT>
+  : Operand<FloatVT>,
+    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperand<32>", [fpimm, ld]> {
+  let EncoderMethod = "getFixedPointScaleOpValue";
+  let DecoderMethod = "DecodeFixedPointScaleImm32";
+}
+
+class fixedpoint_recip_i64<ValueType FloatVT>
+  : Operand<FloatVT>,
+    ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperand<64>", [fpimm, ld]> {
+  let EncoderMethod = "getFixedPointScaleOpValue";
+  let DecoderMethod = "DecodeFixedPointScaleImm64";
+}
+
+def fixedpoint_recip_f16_i32 : fixedpoint_recip_i32<f16>;
+def fixedpoint_recip_f32_i32 : fixedpoint_recip_i32<f32>;
+def fixedpoint_recip_f64_i32 : fixedpoint_recip_i32<f64>;
+
+def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>;
+def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>;
+def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>;
+
 def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
   return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
 }]> {
@@ -4984,55 +5006,55 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
   }
 
   // Scaled
-  def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm,
+  def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
                              [(set (f16 FPR16:$Rd),
-                                   (fdiv (node GPR32:$Rn),
-                                         fixedpoint_f16_i32:$scale))]> {
+                                   (fmul (node GPR32:$Rn),
+                                         fixedpoint_recip_f16_i32:$scale))]> {
     let Inst{31} = 0; // 32-bit GPR flag
     let Inst{23-22} = 0b11; // 16-bit FPR flag
     let scale{5} = 1;
     let Predicates = [HasFullFP16];
   }
 
-  def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
+  def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
                              [(set FPR32:$Rd,
-                                   (fdiv (node GPR32:$Rn),
-                                         fixedpoint_f32_i32:$scale))]> {
+                                   (fmul (node GPR32:$Rn),
+                                         fixedpoint_recip_f32_i32:$scale))]> {
     let Inst{31} = 0; // 32-bit GPR flag
     let Inst{23-22} = 0b00; // 32-bit FPR flag
     let scale{5} = 1;
   }
 
-  def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_f64_i32, asm,
+  def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
                              [(set FPR64:$Rd,
-                                   (fdiv (node GPR32:$Rn),
-                                         fixedpoint_f64_i32:$scale))]> {
+                                   (fmul (node GPR32:$Rn),
+                                         fixedpoint_recip_f64_i32:$scale))]> {
     let Inst{31} = 0; // 32-bit GPR flag
     let Inst{23-22} = 0b01; // 64-bit FPR flag
     let scale{5} = 1;
   }
 
-  def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm,
+  def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
                              [(set (f16 FPR16:$Rd),
-                                   (fdiv (node GPR64:$Rn),
-                                         fixedpoint_f16_i64:$scale))]> {
+                                   (fmul (node GPR64:$Rn),
+                                         fixedpoint_recip_f16_i64:$scale))]> {
     let Inst{31} = 1; // 64-bit GPR flag
     let Inst{23-22} = 0b11; // 16-bit FPR flag
     let Predicates = [HasFullFP16];
   }
 
-  def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
+  def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
                              [(set FPR32:$Rd,
-                                   (fdiv (node GPR64:$Rn),
-                                         fixedpoint_f32_i64:$scale))]> {
+                                   (fmul (node GPR64:$Rn),
+                                         fixedpoint_recip_f32_i64:$scale))]> {
     let Inst{31} = 1; // 64-bit GPR flag
     let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
-  def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
+  def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
                              [(set FPR64:$Rd,
-                                   (fdiv (node GPR64:$Rn),
-                                         fixedpoint_f64_i64:$scale))]> {
+                                   (fmul (node GPR64:$Rn),
+                                         fixedpoint_recip_f64_i64:$scale))]> {
     let Inst{31} = 1; // 64-bit GPR flag
     let Inst{23-22} = 0b01; // 64-bit FPR flag
   }

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c5ffd9bcbe0c1d..79f460765bce51 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4348,6 +4348,34 @@ def : Pat<(i64 (any_llround f64:$Rn)),
 defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
 defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
 
+def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
+          (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
+          (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
+          (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
+          (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
+          (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
+          (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
+          (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
+          (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
+          (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
+          (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
+          (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
+          (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
+
 //===----------------------------------------------------------------------===//
 // Unscaled integer to floating point conversion instruction.
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll
new file mode 100644
index 00000000000000..f78fcea9e3a1f5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll
@@ -0,0 +1,167 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple aarch64-none-linux-gnu -enable-unsafe-fp-math -mattr=+fullfp16 < %s | FileCheck %s
+
+define half @scvtf_f16_2(i32 %state) {
+; CHECK-LABEL: scvtf_f16_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf h0, w0, #1
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to half
+  %div = fmul half %conv, 5.000000e-01
+  ret half %div
+}
+
+define half @scvtf_f16_4(i32 %state) {
+; CHECK-LABEL: scvtf_f16_4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf h0, w0, #2
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to half
+  %div = fmul half %conv, 2.500000e-01
+  ret half %div
+}
+
+define half @scvtf_f16_8(i32 %state) {
+; CHECK-LABEL: scvtf_f16_8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf h0, w0, #3
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to half
+  %div = fmul half %conv, 1.250000e-01
+  ret half %div
+}
+
+define half @scvtf_f16_16(i32 %state) {
+; CHECK-LABEL: scvtf_f16_16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf h0, w0, #4
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to half
+  %div = fmul half %conv, 6.250000e-02
+  ret half %div
+}
+
+define half @scvtf_f16_32(i32 %state) {
+; CHECK-LABEL: scvtf_f16_32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf h0, w0, #5
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to half
+  %div = fmul half %conv, 3.125000e-02
+  ret half %div
+}
+
+define float @scvtf_f32_2(i32 %state) {
+; CHECK-LABEL: scvtf_f32_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf s0, w0, #1
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to float
+  %div = fmul float %conv, 5.000000e-01
+  ret float %div
+}
+
+define float @scvtf_f32_4(i32 %state) {
+; CHECK-LABEL: scvtf_f32_4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf s0, w0, #2
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to float
+  %div = fmul float %conv, 2.500000e-01
+  ret float %div
+}
+
+define float @scvtf_f32_8(i32 %state) {
+; CHECK-LABEL: scvtf_f32_8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf s0, w0, #3
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to float
+  %div = fmul float %conv, 1.250000e-01
+  ret float %div
+}
+
+define float @scvtf_f32_16(i32 %state) {
+; CHECK-LABEL: scvtf_f32_16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf s0, w0, #4
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to float
+  %div = fmul float %conv, 6.250000e-02
+  ret float %div
+}
+
+define float @scvtf_f32_32(i32 %state) {
+; CHECK-LABEL: scvtf_f32_32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf s0, w0, #5
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i32 %state to float
+  %div = fmul float %conv, 3.125000e-02
+  ret float %div
+}
+
+define double @scvtf_f64_2(i64 %state) {
+; CHECK-LABEL: scvtf_f64_2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf d0, x0, #1
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i64 %state to double
+  %div = fmul double %conv, 5.000000e-01
+  ret double %div
+}
+
+define double @scvtf_f64_4(i64 %state) {
+; CHECK-LABEL: scvtf_f64_4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf d0, x0, #2
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i64 %state to double
+  %div = fmul double %conv, 2.500000e-01
+  ret double %div
+}
+
+define double @scvtf_f64_8(i64 %state) {
+; CHECK-LABEL: scvtf_f64_8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf d0, x0, #3
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i64 %state to double
+  %div = fmul double %conv, 1.250000e-01
+  ret double %div
+}
+
+define double @scvtf_f64_16(i64 %state) {
+; CHECK-LABEL: scvtf_f64_16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf d0, x0, #4
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i64 %state to double
+  %div = fmul double %conv, 6.250000e-02
+  ret double %div
+}
+
+define double @scvtf_f64_32(i64 %state) {
+; CHECK-LABEL: scvtf_f64_32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    scvtf d0, x0, #5
+; CHECK-NEXT:    ret
+entry:
+  %conv = sitofp i64 %state to double
+  %div = fmul double %conv, 3.125000e-02
+  ret double %div
+}