[llvm] [X86] Remove `UnsafeFPMath` uses (PR #151667)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 1 22:15:12 PDT 2025
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/151667
>From 5991c77b59df756c02974aa3a65c29a2fcf6188f Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Fri, 1 Aug 2025 17:36:00 +0800
Subject: [PATCH] [X86] Remove `UnsafeFP` uses
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 37 ++++--
.../X86/avx512fp16-combine-shuffle-fma.ll | 2 +-
llvm/test/CodeGen/X86/fmaddsub-combine.ll | 106 +++++++++---------
llvm/test/CodeGen/X86/fmsubadd-combine.ll | 30 +++--
4 files changed, 92 insertions(+), 83 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bbbb1d9057a72..2419df95ed935 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8279,8 +8279,8 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1,
- unsigned &NumExtracts,
- bool &IsSubAdd) {
+ unsigned &NumExtracts, bool &IsSubAdd,
+ bool &AllowContract) {
using namespace SDPatternMatch;
MVT VT = BV->getSimpleValueType(0);
@@ -8292,6 +8292,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
SDValue InVec1 = DAG.getUNDEF(VT);
NumExtracts = 0;
+ AllowContract = NumElts != 0;
// Odd-numbered elements in the input build vector are obtained from
// adding/subtracting two integer/float elements.
@@ -8350,6 +8351,9 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
// Increment the number of extractions done.
++NumExtracts;
+
+ AllowContract &= Op0->getFlags().hasAllowContract() &&
+ Op1->getFlags().hasAllowContract();
}
// Ensure we have found an opcode for both parities and that they are
@@ -8393,9 +8397,10 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
/// FMADDSUB is.
static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
- SelectionDAG &DAG,
- SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
- unsigned ExpectedUses) {
+ SelectionDAG &DAG, SDValue &Opnd0,
+ SDValue &Opnd1, SDValue &Opnd2,
+ unsigned ExpectedUses,
+ bool AllowSubAddOrAddSubContract) {
if (Opnd0.getOpcode() != ISD::FMUL ||
!Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA())
return false;
@@ -8406,7 +8411,8 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
// or MUL + ADDSUB to FMADDSUB.
const TargetOptions &Options = DAG.getTarget().Options;
bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+ Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ (AllowSubAddOrAddSubContract && Opnd0->getFlags().hasAllowContract());
if (!AllowFusion)
return false;
@@ -8427,15 +8433,17 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
SDValue Opnd0, Opnd1;
unsigned NumExtracts;
bool IsSubAdd;
- if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
- IsSubAdd))
+ bool AllowContract;
+ if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, IsSubAdd,
+ AllowContract))
return SDValue();
MVT VT = BV->getSimpleValueType(0);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
- if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) {
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts,
+ AllowContract)) {
unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
}
@@ -43165,7 +43173,7 @@ static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) {
/// the fact that they're unused.
static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
- bool &IsSubAdd) {
+ bool &IsSubAdd, bool &AllowContract) {
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -43216,6 +43224,8 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
// It's a subadd if the vector in the even parity is an FADD.
IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD
: V2->getOpcode() == ISD::FADD;
+ AllowContract =
+ V1->getFlags().hasAllowContract() && V2->getFlags().hasAllowContract();
Opnd0 = LHS;
Opnd1 = RHS;
@@ -43273,14 +43283,17 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL,
SDValue Opnd0, Opnd1;
bool IsSubAdd;
- if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd))
+ bool AllowContract;
+ if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd,
+ AllowContract))
return SDValue();
MVT VT = N->getSimpleValueType(0);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
- if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) {
+ if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2,
+ AllowContract)) {
unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
index f02d11648362c..6d22f669725a2 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll
@@ -4,7 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=F16C
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
-define <2 x half> @foo(<2 x half> %0) "unsafe-fp-math"="true" nounwind {
+define <2 x half> @foo(<2 x half> %0) nounwind {
; AVX2-LABEL: foo:
; AVX2: # %bb.0:
; AVX2-NEXT: subq $40, %rsp
diff --git a/llvm/test/CodeGen/X86/fmaddsub-combine.ll b/llvm/test/CodeGen/X86/fmaddsub-combine.ll
index 5219ab3fab944..95f5bfe6f968e 100644
--- a/llvm/test/CodeGen/X86/fmaddsub-combine.ll
+++ b/llvm/test/CodeGen/X86/fmaddsub-combine.ll
@@ -6,7 +6,7 @@
; This test checks the fusing of MUL + ADDSUB to FMADDSUB.
-define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
+define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
; NOFMA-LABEL: mul_addsub_pd128:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@@ -23,14 +23,14 @@ define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x do
; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
entry:
- %AB = fmul <2 x double> %A, %B
- %Sub = fsub <2 x double> %AB, %C
- %Add = fadd <2 x double> %AB, %C
+ %AB = fmul contract <2 x double> %A, %B
+ %Sub = fsub contract <2 x double> %AB, %C
+ %Add = fadd contract <2 x double> %AB, %C
%Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> <i32 0, i32 3>
ret <2 x double> %Addsub
}
-define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
+define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
; NOFMA-LABEL: mul_addsub_ps128:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@@ -47,14 +47,14 @@ define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float>
; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
entry:
- %AB = fmul <4 x float> %A, %B
- %Sub = fsub <4 x float> %AB, %C
- %Add = fadd <4 x float> %AB, %C
+ %AB = fmul contract <4 x float> %A, %B
+ %Sub = fsub contract <4 x float> %AB, %C
+ %Add = fadd contract <4 x float> %AB, %C
%Addsub = shufflevector <4 x float> %Sub, <4 x float> %Add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x float> %Addsub
}
-define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
+define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) {
; NOFMA-LABEL: mul_addsub_pd256:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
@@ -71,14 +71,14 @@ define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x dou
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
entry:
- %AB = fmul <4 x double> %A, %B
- %Sub = fsub <4 x double> %AB, %C
- %Add = fadd <4 x double> %AB, %C
+ %AB = fmul contract <4 x double> %A, %B
+ %Sub = fsub contract <4 x double> %AB, %C
+ %Add = fadd contract <4 x double> %AB, %C
%Addsub = shufflevector <4 x double> %Sub, <4 x double> %Add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %Addsub
}
-define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
+define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) {
; NOFMA-LABEL: mul_addsub_ps256:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
@@ -95,14 +95,14 @@ define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float>
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
entry:
- %AB = fmul <8 x float> %A, %B
- %Sub = fsub <8 x float> %AB, %C
- %Add = fadd <8 x float> %AB, %C
+ %AB = fmul contract <8 x float> %A, %B
+ %Sub = fsub contract <8 x float> %AB, %C
+ %Add = fadd contract <8 x float> %AB, %C
%Addsub = shufflevector <8 x float> %Sub, <8 x float> %Add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x float> %Addsub
}
-define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
+define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) {
; NOFMA-LABEL: mul_addsub_pd512:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
@@ -128,14 +128,14 @@ define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x dou
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
entry:
- %AB = fmul <8 x double> %A, %B
- %Sub = fsub <8 x double> %AB, %C
- %Add = fadd <8 x double> %AB, %C
+ %AB = fmul contract <8 x double> %A, %B
+ %Sub = fsub contract <8 x double> %AB, %C
+ %Add = fadd contract <8 x double> %AB, %C
%Addsub = shufflevector <8 x double> %Sub, <8 x double> %Add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x double> %Addsub
}
-define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
+define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) {
; NOFMA-LABEL: mul_addsub_ps512:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
@@ -161,14 +161,14 @@ define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x fl
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
entry:
- %AB = fmul <16 x float> %A, %B
- %Sub = fsub <16 x float> %AB, %C
- %Add = fadd <16 x float> %AB, %C
+ %AB = fmul contract <16 x float> %A, %B
+ %Sub = fsub contract <16 x float> %AB, %C
+ %Add = fadd contract <16 x float> %AB, %C
%Addsub = shufflevector <16 x float> %Sub, <16 x float> %Add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
ret <16 x float> %Addsub
}
-define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 {
+define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_ps128:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@@ -185,19 +185,19 @@ define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D,
; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
bb:
- %A = fmul <4 x float> %C, %D
+ %A = fmul contract <4 x float> %C, %D
%A0 = extractelement <4 x float> %A, i32 0
%B0 = extractelement <4 x float> %B, i32 0
- %sub0 = fsub float %A0, %B0
+ %sub0 = fsub contract float %A0, %B0
%A2 = extractelement <4 x float> %A, i32 2
%B2 = extractelement <4 x float> %B, i32 2
- %sub2 = fsub float %A2, %B2
+ %sub2 = fsub contract float %A2, %B2
%A1 = extractelement <4 x float> %A, i32 1
%B1 = extractelement <4 x float> %B, i32 1
- %add1 = fadd float %A1, %B1
+ %add1 = fadd contract float %A1, %B1
%A3 = extractelement <4 x float> %A, i32 3
%B3 = extractelement <4 x float> %B, i32 3
- %add3 = fadd float %A3, %B3
+ %add3 = fadd contract float %A3, %B3
%vecinsert1 = insertelement <4 x float> undef, float %sub0, i32 0
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %add1, i32 1
%vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub2, i32 2
@@ -205,7 +205,7 @@ bb:
ret <4 x float> %vecinsert4
}
-define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 {
+define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_pd128:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@@ -222,7 +222,7 @@ define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double>
; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
bb:
- %A = fmul <2 x double> %C, %D
+ %A = fmul contract <2 x double> %C, %D
%A0 = extractelement <2 x double> %A, i32 0
%B0 = extractelement <2 x double> %B, i32 0
%sub0 = fsub double %A0, %B0
@@ -234,7 +234,7 @@ bb:
ret <2 x double> %vecinsert2
}
-define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 {
+define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_ps256:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
@@ -251,7 +251,7 @@ define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D,
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
bb:
- %A = fmul <8 x float> %C, %D
+ %A = fmul contract <8 x float> %C, %D
%A0 = extractelement <8 x float> %A, i32 0
%B0 = extractelement <8 x float> %B, i32 0
%sub0 = fsub float %A0, %B0
@@ -287,7 +287,7 @@ bb:
ret <8 x float> %vecinsert8
}
-define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 {
+define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_pd256:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
@@ -304,7 +304,7 @@ define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double>
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
bb:
- %A = fmul <4 x double> %C, %D
+ %A = fmul contract <4 x double> %C, %D
%A0 = extractelement <4 x double> %A, i32 0
%B0 = extractelement <4 x double> %B, i32 0
%sub0 = fsub double %A0, %B0
@@ -324,7 +324,7 @@ bb:
ret <4 x double> %vecinsert4
}
-define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
+define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_ps512:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
@@ -350,7 +350,7 @@ define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float>
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
bb:
- %A = fmul <16 x float> %C, %D
+ %A = fmul contract <16 x float> %C, %D
%A0 = extractelement <16 x float> %A, i32 0
%B0 = extractelement <16 x float> %B, i32 0
%sub0 = fsub float %A0, %B0
@@ -418,7 +418,7 @@ bb:
ret <16 x float> %vecinsert16
}
-define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
+define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_pd512:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
@@ -444,7 +444,7 @@ define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double>
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
bb:
- %A = fmul <8 x double> %C, %D
+ %A = fmul contract <8 x double> %C, %D
%A0 = extractelement <8 x double> %A, i32 0
%B0 = extractelement <8 x double> %B, i32 0
%sub0 = fsub double %A0, %B0
@@ -477,7 +477,7 @@ bb:
ret <8 x double> %vecinsert8
}
-define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 {
+define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_ps128:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@@ -506,7 +506,7 @@ define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D,
; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
bb:
- %A = fmul <4 x float> %C, %D
+ %A = fmul contract <4 x float> %C, %D
%A0 = extractelement <4 x float> %A, i32 0
%B0 = extractelement <4 x float> %B, i32 0
%sub0 = fadd float %A0, %B0
@@ -526,7 +526,7 @@ bb:
ret <4 x float> %vecinsert4
}
-define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 {
+define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_pd128:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@@ -547,7 +547,7 @@ define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double>
; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
bb:
- %A = fmul <2 x double> %C, %D
+ %A = fmul contract <2 x double> %C, %D
%A0 = extractelement <2 x double> %A, i32 0
%B0 = extractelement <2 x double> %B, i32 0
%sub0 = fadd double %A0, %B0
@@ -559,7 +559,7 @@ bb:
ret <2 x double> %vecinsert2
}
-define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 {
+define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_ps256:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
@@ -604,7 +604,7 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
bb:
- %A = fmul <8 x float> %C, %D
+ %A = fmul contract <8 x float> %C, %D
%A0 = extractelement <8 x float> %A, i32 0
%B0 = extractelement <8 x float> %B, i32 0
%sub0 = fadd float %A0, %B0
@@ -640,7 +640,7 @@ bb:
ret <8 x float> %vecinsert8
}
-define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 {
+define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_pd256:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
@@ -669,7 +669,7 @@ define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double>
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
bb:
- %A = fmul <4 x double> %C, %D
+ %A = fmul contract <4 x double> %C, %D
%A0 = extractelement <4 x double> %A, i32 0
%B0 = extractelement <4 x double> %B, i32 0
%sub0 = fadd double %A0, %B0
@@ -689,7 +689,7 @@ bb:
ret <4 x double> %vecinsert4
}
-define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
+define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_ps512:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
@@ -765,7 +765,7 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
bb:
- %A = fmul <16 x float> %C, %D
+ %A = fmul contract <16 x float> %C, %D
%A0 = extractelement <16 x float> %A, i32 0
%B0 = extractelement <16 x float> %B, i32 0
%sub0 = fadd float %A0, %B0
@@ -833,7 +833,7 @@ bb:
ret <16 x float> %vecinsert16
}
-define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
+define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_pd512:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
@@ -879,7 +879,7 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
bb:
- %A = fmul <8 x double> %C, %D
+ %A = fmul contract <8 x double> %C, %D
%A0 = extractelement <8 x double> %A, i32 0
%B0 = extractelement <8 x double> %B, i32 0
%sub0 = fadd double %A0, %B0
@@ -911,5 +911,3 @@ bb:
%vecinsert8 = insertelement <8 x double> %vecinsert7, double %add7, i32 7
ret <8 x double> %vecinsert8
}
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/fmsubadd-combine.ll b/llvm/test/CodeGen/X86/fmsubadd-combine.ll
index 674a1d5ad779b..c2f3113cf94ac 100644
--- a/llvm/test/CodeGen/X86/fmsubadd-combine.ll
+++ b/llvm/test/CodeGen/X86/fmsubadd-combine.ll
@@ -6,7 +6,7 @@
; This test checks the fusing of MUL + SUB/ADD to FMSUBADD.
-define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
+define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
; NOFMA-LABEL: mul_subadd_pd128:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@@ -25,14 +25,14 @@ define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x dou
; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
entry:
- %AB = fmul <2 x double> %A, %B
+ %AB = fmul contract <2 x double> %A, %B
%Sub = fsub <2 x double> %AB, %C
%Add = fadd <2 x double> %AB, %C
%subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
ret <2 x double> %subadd
}
-define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
+define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
; NOFMA-LABEL: mul_subadd_ps128:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@@ -51,14 +51,14 @@ define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float>
; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
entry:
- %AB = fmul <4 x float> %A, %B
+ %AB = fmul contract <4 x float> %A, %B
%Sub = fsub <4 x float> %AB, %C
%Add = fadd <4 x float> %AB, %C
%subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x float> %subadd
}
-define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
+define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) {
; NOFMA-LABEL: mul_subadd_pd256:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
@@ -77,14 +77,14 @@ define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x dou
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
entry:
- %AB = fmul <4 x double> %A, %B
+ %AB = fmul contract <4 x double> %A, %B
%Sub = fsub <4 x double> %AB, %C
%Add = fadd <4 x double> %AB, %C
%subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %subadd
}
-define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
+define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) {
; NOFMA-LABEL: mul_subadd_ps256:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
@@ -103,14 +103,14 @@ define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float>
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
entry:
- %AB = fmul <8 x float> %A, %B
+ %AB = fmul contract <8 x float> %A, %B
%Sub = fsub <8 x float> %AB, %C
%Add = fadd <8 x float> %AB, %C
%subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x float> %subadd
}
-define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
+define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) {
; NOFMA-LABEL: mul_subadd_pd512:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
@@ -140,14 +140,14 @@ define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x dou
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
entry:
- %AB = fmul <8 x double> %A, %B
+ %AB = fmul contract <8 x double> %A, %B
%Sub = fsub <8 x double> %AB, %C
%Add = fadd <8 x double> %AB, %C
%subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x double> %subadd
}
-define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
+define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) {
; NOFMA-LABEL: mul_subadd_ps512:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
@@ -177,7 +177,7 @@ define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x fl
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
entry:
- %AB = fmul <16 x float> %A, %B
+ %AB = fmul contract <16 x float> %A, %B
%Sub = fsub <16 x float> %AB, %C
%Add = fadd <16 x float> %AB, %C
%subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
@@ -185,7 +185,7 @@ entry:
}
; This should not be matched to fmsubadd because the mul is on the wrong side of the fsub.
-define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
+define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
; CHECK-LABEL: mul_subadd_bad_commute:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@@ -194,11 +194,9 @@ define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; CHECK-NEXT: retq
entry:
- %AB = fmul <2 x double> %A, %B
+ %AB = fmul contract <2 x double> %A, %B
%Sub = fsub <2 x double> %C, %AB
%Add = fadd <2 x double> %AB, %C
%subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
ret <2 x double> %subadd
}
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
More information about the llvm-commits
mailing list