[llvm] 9f8dcb0 - [AArch64] Try to detect patterns with fdiv and fmul for [su]cvtf.
Jingu Kang via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 15 02:59:01 PDT 2023
Author: Jingu Kang
Date: 2023-08-15T10:57:07+01:00
New Revision: 9f8dcb070655b4914d47848dcfbba742f12b25bd
URL: https://github.com/llvm/llvm-project/commit/9f8dcb070655b4914d47848dcfbba742f12b25bd
DIFF: https://github.com/llvm/llvm-project/commit/9f8dcb070655b4914d47848dcfbba742f12b25bd.diff
LOG: [AArch64] Try to detect patterns with fdiv and fmul for [su]cvtf.
If fmul's constant operand is the reciprocal of a power of 2 (i.e 1/2^n) or
fdiv's constant operand is power of 2, we can try to match patterns with
[su]int_to_fp for [su]cvtf.
Differential Revision: https://reviews.llvm.org/D156538
Added:
llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c01b0eaff94b9a..479067d2fb6a40 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -462,6 +462,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
+ template<unsigned RegWidth>
+ bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
+ return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
+ }
+
+ bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
+ unsigned Width);
+
bool SelectCMP_SWAP(SDNode *N);
bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
@@ -3625,9 +3633,10 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
return true;
}
-bool
-AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
- unsigned RegWidth) {
+static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
+ SDValue &FixedPos,
+ unsigned RegWidth,
+ bool isReciprocal) {
APFloat FVal(0.0);
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
FVal = CN->getValueAPF();
@@ -3652,13 +3661,18 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
// integers.
bool IsExact;
+ if (isReciprocal)
+ if (!FVal.getExactInverse(&FVal))
+ return false;
+
// fbits is between 1 and 64 in the worst-case, which means the fmul
// could have 2^64 as an actual operand. Need 65 bits of precision.
APSInt IntVal(65, true);
FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
// N.b. isPowerOf2 also checks for > 0.
- if (!IsExact || !IntVal.isPowerOf2()) return false;
+ if (!IsExact || !IntVal.isPowerOf2())
+ return false;
unsigned FBits = IntVal.logBase2();
// Checks above should have guaranteed that we haven't lost information in
@@ -3669,6 +3683,19 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
return true;
}
+bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
+ false);
+}
+
+bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
+ SDValue &FixedPos,
+ unsigned RegWidth) {
+ return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
+ true);
+}
+
// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
// of the string and obtains the integer values from them and combines these
// into a single value to be used in the MRS/MSR instruction.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index ac7b208e3f075c..9eccfedd9b6529 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -703,6 +703,28 @@ def fixedpoint_f16_i64 : fixedpoint_i64<f16>;
def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
+class fixedpoint_recip_i32<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperand<32>", [fpimm, ld]> {
+ let EncoderMethod = "getFixedPointScaleOpValue";
+ let DecoderMethod = "DecodeFixedPointScaleImm32";
+}
+
+class fixedpoint_recip_i64<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperand<64>", [fpimm, ld]> {
+ let EncoderMethod = "getFixedPointScaleOpValue";
+ let DecoderMethod = "DecodeFixedPointScaleImm64";
+}
+
+def fixedpoint_recip_f16_i32 : fixedpoint_recip_i32<f16>;
+def fixedpoint_recip_f32_i32 : fixedpoint_recip_i32<f32>;
+def fixedpoint_recip_f64_i32 : fixedpoint_recip_i32<f64>;
+
+def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>;
+def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>;
+def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>;
+
def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
@@ -4984,55 +5006,55 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
}
// Scaled
- def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm,
+ def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
[(set (f16 FPR16:$Rd),
- (fdiv (node GPR32:$Rn),
- fixedpoint_f16_i32:$scale))]> {
+ (fmul (node GPR32:$Rn),
+ fixedpoint_recip_f16_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
let scale{5} = 1;
let Predicates = [HasFullFP16];
}
- def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
+ def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
[(set FPR32:$Rd,
- (fdiv (node GPR32:$Rn),
- fixedpoint_f32_i32:$scale))]> {
+ (fmul (node GPR32:$Rn),
+ fixedpoint_recip_f32_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
let scale{5} = 1;
}
- def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_f64_i32, asm,
+ def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
[(set FPR64:$Rd,
- (fdiv (node GPR32:$Rn),
- fixedpoint_f64_i32:$scale))]> {
+ (fmul (node GPR32:$Rn),
+ fixedpoint_recip_f64_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
let scale{5} = 1;
}
- def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm,
+ def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
[(set (f16 FPR16:$Rd),
- (fdiv (node GPR64:$Rn),
- fixedpoint_f16_i64:$scale))]> {
+ (fmul (node GPR64:$Rn),
+ fixedpoint_recip_f16_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b11; // 16-bit FPR flag
let Predicates = [HasFullFP16];
}
- def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
+ def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
[(set FPR32:$Rd,
- (fdiv (node GPR64:$Rn),
- fixedpoint_f32_i64:$scale))]> {
+ (fmul (node GPR64:$Rn),
+ fixedpoint_recip_f32_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b00; // 32-bit FPR flag
}
- def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
+ def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
[(set FPR64:$Rd,
- (fdiv (node GPR64:$Rn),
- fixedpoint_f64_i64:$scale))]> {
+ (fmul (node GPR64:$Rn),
+ fixedpoint_recip_f64_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
let Inst{23-22} = 0b01; // 64-bit FPR flag
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index c5ffd9bcbe0c1d..79f460765bce51 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4348,6 +4348,34 @@ def : Pat<(i64 (any_llround f64:$Rn)),
defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
+def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
+ (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
+ (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
+ (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
+ (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
+ (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
+ (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
+ (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
+ (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
+ (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
+
+def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
+ (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
+def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
+ (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
+def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
+ (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
+
//===----------------------------------------------------------------------===//
// Unscaled integer to floating point conversion instruction.
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll
new file mode 100644
index 00000000000000..f78fcea9e3a1f5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/svtcf-fmul-fdiv-combine.ll
@@ -0,0 +1,167 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple aarch64-none-linux-gnu -enable-unsafe-fp-math -mattr=+fullfp16 < %s | FileCheck %s
+
+define half @scvtf_f16_2(i32 %state) {
+; CHECK-LABEL: scvtf_f16_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf h0, w0, #1
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to half
+ %div = fmul half %conv, 5.000000e-01
+ ret half %div
+}
+
+define half @scvtf_f16_4(i32 %state) {
+; CHECK-LABEL: scvtf_f16_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf h0, w0, #2
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to half
+ %div = fmul half %conv, 2.500000e-01
+ ret half %div
+}
+
+define half @scvtf_f16_8(i32 %state) {
+; CHECK-LABEL: scvtf_f16_8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf h0, w0, #3
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to half
+ %div = fmul half %conv, 1.250000e-01
+ ret half %div
+}
+
+define half @scvtf_f16_16(i32 %state) {
+; CHECK-LABEL: scvtf_f16_16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf h0, w0, #4
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to half
+ %div = fmul half %conv, 6.250000e-02
+ ret half %div
+}
+
+define half @scvtf_f16_32(i32 %state) {
+; CHECK-LABEL: scvtf_f16_32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf h0, w0, #5
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to half
+ %div = fmul half %conv, 3.125000e-02
+ ret half %div
+}
+
+define float @scvtf_f32_2(i32 %state) {
+; CHECK-LABEL: scvtf_f32_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf s0, w0, #1
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to float
+ %div = fmul float %conv, 5.000000e-01
+ ret float %div
+}
+
+define float @scvtf_f32_4(i32 %state) {
+; CHECK-LABEL: scvtf_f32_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf s0, w0, #2
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to float
+ %div = fmul float %conv, 2.500000e-01
+ ret float %div
+}
+
+define float @scvtf_f32_8(i32 %state) {
+; CHECK-LABEL: scvtf_f32_8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf s0, w0, #3
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to float
+ %div = fmul float %conv, 1.250000e-01
+ ret float %div
+}
+
+define float @scvtf_f32_16(i32 %state) {
+; CHECK-LABEL: scvtf_f32_16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf s0, w0, #4
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to float
+ %div = fmul float %conv, 6.250000e-02
+ ret float %div
+}
+
+define float @scvtf_f32_32(i32 %state) {
+; CHECK-LABEL: scvtf_f32_32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf s0, w0, #5
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i32 %state to float
+ %div = fmul float %conv, 3.125000e-02
+ ret float %div
+}
+
+define double @scvtf_f64_2(i64 %state) {
+; CHECK-LABEL: scvtf_f64_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf d0, x0, #1
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i64 %state to double
+ %div = fmul double %conv, 5.000000e-01
+ ret double %div
+}
+
+define double @scvtf_f64_4(i64 %state) {
+; CHECK-LABEL: scvtf_f64_4:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf d0, x0, #2
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i64 %state to double
+ %div = fmul double %conv, 2.500000e-01
+ ret double %div
+}
+
+define double @scvtf_f64_8(i64 %state) {
+; CHECK-LABEL: scvtf_f64_8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf d0, x0, #3
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i64 %state to double
+ %div = fmul double %conv, 1.250000e-01
+ ret double %div
+}
+
+define double @scvtf_f64_16(i64 %state) {
+; CHECK-LABEL: scvtf_f64_16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf d0, x0, #4
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i64 %state to double
+ %div = fmul double %conv, 6.250000e-02
+ ret double %div
+}
+
+define double @scvtf_f64_32(i64 %state) {
+; CHECK-LABEL: scvtf_f64_32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf d0, x0, #5
+; CHECK-NEXT: ret
+entry:
+ %conv = sitofp i64 %state to double
+ %div = fmul double %conv, 3.125000e-02
+ ret double %div
+}
More information about the llvm-commits
mailing list