[llvm] 2ac1999 - [AArch64][SVE] Propagate math flags from intrinsics to instructions
Matthew Devereau via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 5 07:39:27 PDT 2021
Author: Matthew Devereau
Date: 2021-10-05T15:39:13+01:00
New Revision: 2ac199993764e068494a69a85af098c0ae1ff37e
URL: https://github.com/llvm/llvm-project/commit/2ac199993764e068494a69a85af098c0ae1ff37e
DIFF: https://github.com/llvm/llvm-project/commit/2ac199993764e068494a69a85af098c0ae1ff37e.diff
LOG: [AArch64][SVE] Propagate math flags from intrinsics to instructions
Retain floating-point math flags inside instCombineSVEVectorBinOp
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 101cb15e82a0a..3b965c86aa643 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -710,16 +710,18 @@ static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
static Optional<Instruction *> instCombineSVEVectorBinOp(InstCombiner &IC,
IntrinsicInst &II) {
+ auto *OpPredicate = II.getOperand(0);
auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
if (BinOpCode == Instruction::BinaryOpsEnd ||
- !match(II.getOperand(0),
- m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
- m_ConstantInt<AArch64SVEPredPattern::all>())))
+ !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>())))
return None;
IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);
- return IC.replaceInstUsesWith(
- II, Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2)));
+ Builder.setFastMathFlags(II.getFastMathFlags());
+ auto BinOp =
+ Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2));
+ return IC.replaceInstUsesWith(II, BinOp);
}
static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
index 678489f0ceaf7..cac17e6f74032 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll
@@ -10,7 +10,7 @@ declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: @replace_fmul_intrinsic_half
-; CHECK-NEXT: %1 = fmul <vscale x 8 x half> %a, %b
+; CHECK-NEXT: %1 = fmul fast <vscale x 8 x half> %a, %b
; CHECK-NEXT: ret <vscale x 8 x half> %1
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -20,7 +20,7 @@ define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a,
declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: @replace_fmul_intrinsic_float
-; CHECK-NEXT: %1 = fmul <vscale x 4 x float> %a, %b
+; CHECK-NEXT: %1 = fmul fast <vscale x 4 x float> %a, %b
; CHECK-NEXT: ret <vscale x 4 x float> %1
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -30,7 +30,7 @@ define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %
declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: @replace_fmul_intrinsic_double
-; CHECK-NEXT: %1 = fmul <vscale x 2 x double> %a, %b
+; CHECK-NEXT: %1 = fmul fast <vscale x 2 x double> %a, %b
; CHECK-NEXT: ret <vscale x 2 x double> %1
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -40,7 +40,7 @@ define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double
declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: @replace_fadd_intrinsic_half
-; CHECK-NEXT: %1 = fadd <vscale x 8 x half> %a, %b
+; CHECK-NEXT: %1 = fadd fast <vscale x 8 x half> %a, %b
; CHECK-NEXT: ret <vscale x 8 x half> %1
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -50,7 +50,7 @@ define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a,
declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: @replace_fadd_intrinsic_float
-; CHECK-NEXT: %1 = fadd <vscale x 4 x float> %a, %b
+; CHECK-NEXT: %1 = fadd fast <vscale x 4 x float> %a, %b
; CHECK-NEXT: ret <vscale x 4 x float> %1
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -60,7 +60,7 @@ define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %
declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: @replace_fadd_intrinsic_double
-; CHECK-NEXT: %1 = fadd <vscale x 2 x double> %a, %b
+; CHECK-NEXT: %1 = fadd fast <vscale x 2 x double> %a, %b
; CHECK-NEXT: ret <vscale x 2 x double> %1
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -70,7 +70,7 @@ define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double
declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
; CHECK-LABEL: @replace_fsub_intrinsic_half
-; CHECK-NEXT: %1 = fsub <vscale x 8 x half> %a, %b
+; CHECK-NEXT: %1 = fsub fast <vscale x 8 x half> %a, %b
; CHECK-NEXT: ret <vscale x 8 x half> %1
%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
@@ -80,7 +80,7 @@ define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a,
declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
; CHECK-LABEL: @replace_fsub_intrinsic_float
-; CHECK-NEXT: %1 = fsub <vscale x 4 x float> %a, %b
+; CHECK-NEXT: %1 = fsub fast <vscale x 4 x float> %a, %b
; CHECK-NEXT: ret <vscale x 4 x float> %1
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
@@ -91,7 +91,7 @@ define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %
declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
; CHECK-LABEL: @replace_fsub_intrinsic_double
-; CHECK-NEXT: %1 = fsub <vscale x 2 x double> %a, %b
+; CHECK-NEXT: %1 = fsub fast <vscale x 2 x double> %a, %b
; CHECK-NEXT: ret <vscale x 2 x double> %1
%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
@@ -108,4 +108,13 @@ define <vscale x 2 x double> @no_replace_on_non_ptrue_all(<vscale x 2 x double>
ret <vscale x 2 x double> %2
}
+define <vscale x 2 x double> @replace_fsub_intrinsic_no_fast_flag(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: @replace_fsub_intrinsic_no_fast_flag
+; CHECK-NEXT: %1 = fsub <vscale x 2 x double> %a, %b
+; CHECK-NEXT: ret <vscale x 2 x double> %1
+ %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %2
+}
+
attributes #0 = { "target-features"="+sve" }
More information about the llvm-commits
mailing list