[llvm] 2ac1999 - [AArch64][SVE] Propagate math flags from intrinsics to instructions

Tue Oct 5 07:39:27 PDT 2021

Author: Matthew Devereau
Date: 2021-10-05T15:39:13+01:00
New Revision: 2ac199993764e068494a69a85af098c0ae1ff37e

URL: https://github.com/llvm/llvm-project/commit/2ac199993764e068494a69a85af098c0ae1ff37e
DIFF: https://github.com/llvm/llvm-project/commit/2ac199993764e068494a69a85af098c0ae1ff37e.diff

LOG: [AArch64][SVE] Propagate math flags from intrinsics to instructions

Retain floating-point math flags inside instCombineSVEVectorBinOp

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 101cb15e82a0a..3b965c86aa643 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -710,16 +710,18 @@ static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
 
 static Optional<Instruction *> instCombineSVEVectorBinOp(InstCombiner &IC,
                                                          IntrinsicInst &II) {
+  auto *OpPredicate = II.getOperand(0);
   auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
   if (BinOpCode == Instruction::BinaryOpsEnd ||
-      !match(II.getOperand(0),
-             m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
-                 m_ConstantInt<AArch64SVEPredPattern::all>())))
+      !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+                              m_ConstantInt<AArch64SVEPredPattern::all>())))
     return None;
   IRBuilder<> Builder(II.getContext());
   Builder.SetInsertPoint(&II);
-  return IC.replaceInstUsesWith(
-      II, Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2)));
+  Builder.setFastMathFlags(II.getFastMathFlags());
+  auto BinOp =
+      Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2));
+  return IC.replaceInstUsesWith(II, BinOp);
 }
 
 static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
index 678489f0ceaf7..cac17e6f74032 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
@@ -10,7 +10,7 @@ declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
 declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
 ; CHECK-LABEL: @replace_fmul_intrinsic_half
-; CHECK-NEXT:  %1 = fmul <vscale x 8 x half> %a, %b
+; CHECK-NEXT:  %1 = fmul fast <vscale x 8 x half> %a, %b
 ; CHECK-NEXT:  ret <vscale x 8 x half> %1
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -20,7 +20,7 @@ define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a,
 declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
 ; CHECK-LABEL: @replace_fmul_intrinsic_float
-; CHECK-NEXT:  %1 = fmul <vscale x 4 x float> %a, %b
+; CHECK-NEXT:  %1 = fmul fast <vscale x 4 x float> %a, %b
 ; CHECK-NEXT:  ret <vscale x 4 x float> %1
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -30,7 +30,7 @@ define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %
 declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 ; CHECK-LABEL: @replace_fmul_intrinsic_double
-; CHECK-NEXT:  %1 = fmul <vscale x 2 x double> %a, %b
+; CHECK-NEXT:  %1 = fmul fast <vscale x 2 x double> %a, %b
 ; CHECK-NEXT:  ret <vscale x 2 x double> %1
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -40,7 +40,7 @@ define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double
 declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
 ; CHECK-LABEL: @replace_fadd_intrinsic_half
-; CHECK-NEXT:  %1 = fadd <vscale x 8 x half> %a, %b
+; CHECK-NEXT:  %1 = fadd fast <vscale x 8 x half> %a, %b
 ; CHECK-NEXT:  ret <vscale x 8 x half> %1
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -50,7 +50,7 @@ define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a,
 declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
 ; CHECK-LABEL: @replace_fadd_intrinsic_float
-; CHECK-NEXT:  %1 = fadd <vscale x 4 x float> %a, %b
+; CHECK-NEXT:  %1 = fadd fast <vscale x 4 x float> %a, %b
 ; CHECK-NEXT:  ret <vscale x 4 x float> %1
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -60,7 +60,7 @@ define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %
 declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 ; CHECK-LABEL: @replace_fadd_intrinsic_double
-; CHECK-NEXT:  %1 = fadd <vscale x 2 x double> %a, %b
+; CHECK-NEXT:  %1 = fadd fast <vscale x 2 x double> %a, %b
 ; CHECK-NEXT:  ret <vscale x 2 x double> %1
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -70,7 +70,7 @@ define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double
 declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
 ; CHECK-LABEL: @replace_fsub_intrinsic_half
-; CHECK-NEXT:  %1 = fsub <vscale x 8 x half> %a, %b
+; CHECK-NEXT:  %1 = fsub fast <vscale x 8 x half> %a, %b
 ; CHECK-NEXT:  ret <vscale x 8 x half> %1
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -80,7 +80,7 @@ define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a,
 declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
 ; CHECK-LABEL: @replace_fsub_intrinsic_float
-; CHECK-NEXT:  %1 = fsub <vscale x 4 x float> %a, %b
+; CHECK-NEXT:  %1 = fsub fast <vscale x 4 x float> %a, %b
 ; CHECK-NEXT:  ret <vscale x 4 x float> %1
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -91,7 +91,7 @@ define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %
 declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 ; CHECK-LABEL: @replace_fsub_intrinsic_double
-; CHECK-NEXT:  %1 = fsub <vscale x 2 x double> %a, %b
+; CHECK-NEXT:  %1 = fsub fast <vscale x 2 x double> %a, %b
 ; CHECK-NEXT:  ret <vscale x 2 x double> %1
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -108,4 +108,13 @@ define <vscale x 2 x double> @no_replace_on_non_ptrue_all(<vscale x 2 x double>
   ret <vscale x 2 x double> %2
 }
 
+define <vscale x 2 x double> @replace_fsub_intrinsic_no_fast_flag(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: @replace_fsub_intrinsic_no_fast_flag
+; CHECK-NEXT:  %1 = fsub <vscale x 2 x double> %a, %b
+; CHECK-NEXT:  ret <vscale x 2 x double> %1
+  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %2
+}
+
 attributes #0 = { "target-features"="+sve" }