[llvm] [AArch64] Remove `UnsafeFPMath` (PR #150876)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 28 02:53:26 PDT 2025
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/150876
>From b15d1909955589d298e0f5ec19dc64385cb63fdf Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Sat, 26 Jul 2025 18:00:16 +0800
Subject: [PATCH 1/3] [AArch64] Remove `UnsafeFPMath`
---
.../lib/Target/AArch64/AArch64ISelLowering.cpp | 18 ++++++++----------
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 9 +++------
3 files changed, 12 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7b49754ee7e1f..4f7bd79773f14 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11325,7 +11325,7 @@ static SDValue emitFloatCompareMask(SDValue LHS, SDValue RHS, SDValue TVal,
SDValue AArch64TargetLowering::LowerSELECT_CC(
ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal,
- iterator_range<SDNode::user_iterator> Users, bool HasNoNaNs,
+ iterator_range<SDNode::user_iterator> Users, SDNodeFlags Flags,
const SDLoc &DL, SelectionDAG &DAG) const {
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
@@ -11523,7 +11523,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
return true;
}
})) {
- bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || HasNoNaNs;
+ bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Flags.hasNoNaNs();
SDValue VectorCmp =
emitFloatCompareMask(LHS, RHS, TVal, FVal, CC, NoNaNs, DL, DAG);
if (VectorCmp)
@@ -11537,7 +11537,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
- if (DAG.getTarget().Options.UnsafeFPMath) {
+ if (Flags.hasNoSignedZeros()) {
// Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
// "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
@@ -11616,10 +11616,9 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
SDValue RHS = Op.getOperand(1);
SDValue TVal = Op.getOperand(2);
SDValue FVal = Op.getOperand(3);
- bool HasNoNans = Op->getFlags().hasNoNaNs();
+ SDNodeFlags Flags = Op->getFlags();
SDLoc DL(Op);
- return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), HasNoNans, DL,
- DAG);
+ return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), Flags, DL, DAG);
}
SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
@@ -11627,7 +11626,6 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SDValue CCVal = Op->getOperand(0);
SDValue TVal = Op->getOperand(1);
SDValue FVal = Op->getOperand(2);
- bool HasNoNans = Op->getFlags().hasNoNaNs();
SDLoc DL(Op);
EVT Ty = Op.getValueType();
@@ -11694,8 +11692,8 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
DAG.getUNDEF(MVT::f32), FVal);
}
- SDValue Res =
- LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), HasNoNans, DL, DAG);
+ SDValue Res = LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(),
+ Op->getFlags(), DL, DAG);
if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
return DAG.getTargetExtractSubreg(AArch64::hsub, DL, Ty, Res);
@@ -16674,7 +16672,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(Options.AllowFPOpFusion == FPOpFusion::Fast ||
- Options.UnsafeFPMath));
+ I->getFastMathFlags().allowContract()));
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 95d0e3be70d16..ea63edd86210e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -662,7 +662,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
SDValue TVal, SDValue FVal,
iterator_range<SDNode::user_iterator> Users,
- bool HasNoNans, const SDLoc &dl,
+ SDNodeFlags Flags, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8685d7a04ac9c..59d4fd26f6f91 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -6574,10 +6574,8 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
// We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
// the target options or if FADD/FSUB has the contract fast-math flag.
- return Options.UnsafeFPMath ||
- Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ return Options.AllowFPOpFusion == FPOpFusion::Fast ||
Inst.getFlag(MachineInstr::FmContract);
- return true;
}
return false;
}
@@ -6680,9 +6678,8 @@ bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
case AArch64::FMUL_ZZZ_H:
case AArch64::FMUL_ZZZ_S:
case AArch64::FMUL_ZZZ_D:
- return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
- (Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
- Inst.getFlag(MachineInstr::MIFlag::FmNsz));
+ return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
+ Inst.getFlag(MachineInstr::MIFlag::FmNsz);
// == Integer types ==
// -- Base instructions --
>From dbb0443eccc5970cf65f7dc7409d24537b863e05 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 28 Jul 2025 16:57:26 +0800
Subject: [PATCH 2/3] fix tests except sqrt related
---
.../AArch64/aarch64-combine-fmul-fsub.mir | 58 +-
.../CodeGen/AArch64/arm64-fml-combines.ll | 20 +-
llvm/test/CodeGen/AArch64/fcsel-zero.ll | 35 +-
.../AArch64/machine-combiner-reassociate.mir | 22 +-
llvm/test/CodeGen/AArch64/machine-combiner.ll | 887 +++++++++++-------
.../test/CodeGen/AArch64/machine-combiner.mir | 6 +-
.../CodeGen/AArch64/sched-past-vector-ldst.ll | 18 +-
7 files changed, 599 insertions(+), 447 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
index cf4f321ebae18..491d693ec527a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
@@ -1,8 +1,8 @@
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
#
name: f1_2s
registers:
@@ -16,18 +16,18 @@ body: |
%2:fpr64 = COPY $d2
%1:fpr64 = COPY $d1
%0:fpr64 = COPY $d0
- %3:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
- %4:fpr64 = FSUBv2f32 killed %3, %2, implicit $fpcr
+ %3:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
+ %4:fpr64 = contract FSUBv2f32 killed %3, %2, implicit $fpcr
$d0 = COPY %4
RET_ReallyLR implicit $d0
...
# UNPROFITABLE-LABEL: name: f1_2s
-# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2
+# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2
# UNPROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_2s
-# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2
# PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_4s
@@ -42,18 +42,18 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %3:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
- %4:fpr128 = FSUBv4f32 killed %3, %2, implicit $fpcr
+ %3:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
+ %4:fpr128 = contract FSUBv4f32 killed %3, %2, implicit $fpcr
$q0 = COPY %4
RET_ReallyLR implicit $q0
...
# UNPROFITABLE-LABEL: name: f1_4s
-# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
+# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
# UNPROFITABLE-NEXT: FSUBv4f32 killed [[R1]], %2, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_4s
-# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv4f32 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv4f32 %2
# PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_2d
@@ -68,18 +68,18 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
- %4:fpr128 = FSUBv2f64 killed %3, %2, implicit $fpcr
+ %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
+ %4:fpr128 = contract FSUBv2f64 killed %3, %2, implicit $fpcr
$q0 = COPY %4
RET_ReallyLR implicit $q0
...
# UNPROFITABLE-LABEL: name: f1_2d
-# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
+# UNPROFITABLE: %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
# UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_2d
-# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv2f64 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv2f64 %2
# PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_both_fmul_2s
@@ -97,15 +97,15 @@ body: |
%2:fpr64 = COPY $q2
%1:fpr64 = COPY $q1
%0:fpr64 = COPY $q0
- %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
- %5:fpr64 = FMULv2f32 %2, %3, implicit $fpcr
- %6:fpr64 = FSUBv2f32 killed %4, %5, implicit $fpcr
+ %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
+ %5:fpr64 = contract FMULv2f32 %2, %3, implicit $fpcr
+ %6:fpr64 = contract FSUBv2f32 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_2s
-# ALL: %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
+# ALL: %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv2f32 killed %4, %2, %3, implicit $fpcr
---
name: f1_both_fmul_4s
@@ -123,15 +123,15 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
- %5:fpr128 = FMULv4f32 %2, %3, implicit $fpcr
- %6:fpr128 = FSUBv4f32 killed %4, %5, implicit $fpcr
+ %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
+ %5:fpr128 = contract FMULv4f32 %2, %3, implicit $fpcr
+ %6:fpr128 = contract FSUBv4f32 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_4s
-# ALL: %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
+# ALL: %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv4f32 killed %4, %2, %3, implicit $fpcr
---
name: f1_both_fmul_2d
@@ -149,14 +149,14 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
- %5:fpr128 = FMULv2f64 %2, %3, implicit $fpcr
- %6:fpr128 = FSUBv2f64 killed %4, %5, implicit $fpcr
+ %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
+ %5:fpr128 = contract FMULv2f64 %2, %3, implicit $fpcr
+ %6:fpr128 = contract FSUBv2f64 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_2d
-# ALL: %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
+# ALL: %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv2f64 killed %4, %2, %3, implicit $fpcr
diff --git a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
index ce3581030646d..60c48bfce3f85 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -enable-unsafe-fp-math -mattr=+fullfp16 | FileCheck %s
+; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -mattr=+fullfp16 | FileCheck %s
; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -fp-contract=fast -mattr=+fullfp16 | FileCheck %s
define void @foo_2d(ptr %src) {
@@ -130,9 +130,9 @@ for.end: ; preds = %for.body
; CHECK: fnmadd h0, h0, h1, h2
define half @test0(half %a, half %b, half %c) {
entry:
- %0 = fmul half %a, %b
- %mul = fsub half -0.000000e+00, %0
- %sub1 = fsub half %mul, %c
+ %0 = fmul contract half %a, %b
+ %mul = fsub contract half -0.000000e+00, %0
+ %sub1 = fsub contract half %mul, %c
ret half %sub1
}
@@ -140,9 +140,9 @@ entry:
; CHECK: fnmadd s0, s0, s1, s2
define float @test1(float %a, float %b, float %c) {
entry:
- %0 = fmul float %a, %b
- %mul = fsub float -0.000000e+00, %0
- %sub1 = fsub float %mul, %c
+ %0 = fmul contract float %a, %b
+ %mul = fsub contract float -0.000000e+00, %0
+ %sub1 = fsub contract float %mul, %c
ret float %sub1
}
@@ -150,9 +150,9 @@ entry:
; CHECK: fnmadd d0, d0, d1, d2
define double @test2(double %a, double %b, double %c) {
entry:
- %0 = fmul double %a, %b
- %mul = fsub double -0.000000e+00, %0
- %sub1 = fsub double %mul, %c
+ %0 = fmul contract double %a, %b
+ %mul = fsub contract double -0.000000e+00, %0
+ %sub1 = fsub contract double %mul, %c
ret double %sub1
}
diff --git a/llvm/test/CodeGen/AArch64/fcsel-zero.ll b/llvm/test/CodeGen/AArch64/fcsel-zero.ll
index 3fbcd106d08ab..3db588b6c0276 100644
--- a/llvm/test/CodeGen/AArch64/fcsel-zero.ll
+++ b/llvm/test/CodeGen/AArch64/fcsel-zero.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=aarch64-linux-gnu -o - < %s | FileCheck %s
-define float @foeq(float %a, float %b) #0 {
- %t = fcmp oeq float %a, 0.0
+define float @foeq(float %a, float %b) {
+ %t = fcmp nsz oeq float %a, 0.0
%v = select i1 %t, float 0.0, float %b
ret float %v
; CHECK-LABEL: foeq
@@ -11,8 +11,8 @@ define float @foeq(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq
}
-define float @fueq(float %a, float %b) #0 {
- %t = fcmp ueq float %a, 0.0
+define float @fueq(float %a, float %b) {
+ %t = fcmp nsz ueq float %a, 0.0
%v = select i1 %t, float 0.0, float %b
ret float %v
; CHECK-LABEL: fueq
@@ -21,8 +21,8 @@ define float @fueq(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, vs
}
-define float @fone(float %a, float %b) #0 {
- %t = fcmp one float %a, 0.0
+define float @fone(float %a, float %b) {
+ %t = fcmp nsz one float %a, 0.0
%v = select i1 %t, float %b, float 0.0
ret float %v
; CHECK-LABEL: fone
@@ -31,8 +31,8 @@ define float @fone(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], gt
}
-define float @fune(float %a, float %b) #0 {
- %t = fcmp une float %a, 0.0
+define float @fune(float %a, float %b) {
+ %t = fcmp nsz une float %a, 0.0
%v = select i1 %t, float %b, float 0.0
ret float %v
; CHECK-LABEL: fune
@@ -40,8 +40,8 @@ define float @fune(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], ne
}
-define double @doeq(double %a, double %b) #0 {
- %t = fcmp oeq double %a, 0.0
+define double @doeq(double %a, double %b) {
+ %t = fcmp nsz oeq double %a, 0.0
%v = select i1 %t, double 0.0, double %b
ret double %v
; CHECK-LABEL: doeq
@@ -49,8 +49,8 @@ define double @doeq(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq
}
-define double @dueq(double %a, double %b) #0 {
- %t = fcmp ueq double %a, 0.0
+define double @dueq(double %a, double %b) {
+ %t = fcmp nsz ueq double %a, 0.0
%v = select i1 %t, double 0.0, double %b
ret double %v
; CHECK-LABEL: dueq
@@ -59,8 +59,8 @@ define double @dueq(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, vs
}
-define double @done(double %a, double %b) #0 {
- %t = fcmp one double %a, 0.0
+define double @done(double %a, double %b) {
+ %t = fcmp nsz one double %a, 0.0
%v = select i1 %t, double %b, double 0.0
ret double %v
; CHECK-LABEL: done
@@ -69,14 +69,11 @@ define double @done(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], gt
}
-define double @dune(double %a, double %b) #0 {
- %t = fcmp une double %a, 0.0
+define double @dune(double %a, double %b) {
+ %t = fcmp nsz une double %a, 0.0
%v = select i1 %t, double %b, double 0.0
ret double %v
; CHECK-LABEL: dune
; CHECK: fcmp [[R:d[0-9]+]], #0.0
; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], ne
}
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
-
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
index 525f6dd05c6c6..184c9ef7a6ea8 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
@@ -1,14 +1,11 @@
-# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE
-# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu -enable-unsafe-fp-math %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s
# fadd without the reassoc flags can be reassociate only when unsafe fp math is
# enabled.
# CHECK-LABEL: name: fadd_no_reassoc
# CHECK: [[ADD1:%[0-9]+]]:fpr32 = FADDSrr %0, %1, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr %2, %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr
+# CHECK: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr
+# CHECK: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr
---
name: fadd_no_reassoc
alignment: 4
@@ -49,10 +46,9 @@ body: |
# the reassoc flag is ignored.
# CHECK-LABEL: name: fadd_reassoc
# CHECK: [[ADD1:%[0-9]+]]:fpr32 = reassoc FADDSrr %0, %1, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr %2, %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr
+# CHECK: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr
+# CHECK: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr
+
---
name: fadd_reassoc
alignment: 4
@@ -92,10 +88,8 @@ body: |
# Check that flags on the instructions are preserved after reassociation.
# CHECK-LABEL: name: fadd_flags
# CHECK: [[ADD1:%[0-9]+]]:fpr32 = nnan ninf nsz FADDSrr %0, %1, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nnan nsz FADDSrr killed [[ADD1]], %2, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = ninf nsz FADDSrr killed [[ADD2]], %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nsz FADDSrr %2, %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = nsz FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr
+# CHECK: [[ADD2:%[0-9]+]]:fpr32 = nnan nsz FADDSrr killed [[ADD1]], %2, implicit $fpcr
+# CHECK: [[ADD3:%[0-9]+]]:fpr32 = ninf nsz FADDSrr killed [[ADD2]], %3, implicit $fpcr
---
name: fadd_flags
alignment: 4
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll
index ec61fee1039ad..65afd9276c787 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll
@@ -1,29 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s
; Incremental updates of the instruction depths should be enough for this test
; case.
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \
-; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 \
+; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s
; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.
define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds1:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s0, s2
-; CHECK-STD-NEXT: fadd s0, s0, s3
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds1:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s0, s3
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
@@ -44,110 +36,110 @@ define float @reassociate_adds1_fast(float %x0, float %x1, float %x2, float %x3)
}
define float @reassociate_adds1_reassoc(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds1_reassoc:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s0, s2
-; CHECK-STD-NEXT: fadd s0, s0, s3
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds1_reassoc:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: ret
- %t0 = fadd reassoc float %x0, %x1
- %t1 = fadd reassoc float %t0, %x2
- %t2 = fadd reassoc float %t1, %x3
+; CHECK-LABEL: reassociate_adds1_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %t1, %x3
ret float %t2
}
define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds2:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s2, s0
-; CHECK-STD-NEXT: fadd s0, s0, s3
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds2:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s2, s0
+; CHECK-NEXT: fadd s0, s0, s3
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %t1, %x3
ret float %t2
}
+define float @reassociate_adds2_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %x2, %t0
+ %t2 = fadd reassoc nsz float %t1, %x3
+ ret float %t2
+}
+
define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds3:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s0, s2
-; CHECK-STD-NEXT: fadd s0, s3, s0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds3:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
-; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s3, s0
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %x3, %t1
ret float %t2
}
+define float @reassociate_adds3_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s3, s2
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %x3, %t1
+ ret float %t2
+}
+
define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds4:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s2, s0
-; CHECK-STD-NEXT: fadd s0, s3, s0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds4:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
-; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s2, s0
+; CHECK-NEXT: fadd s0, s3, s0
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
ret float %t2
}
+define float @reassociate_adds4_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s3, s2
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %x2, %t0
+ %t2 = fadd reassoc nsz float %x3, %t1
+ ret float %t2
+}
+
; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
; produced because that would cost more compile time.
define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
-; CHECK-STD-LABEL: reassociate_adds5:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s0, s2
-; CHECK-STD-NEXT: fadd s0, s0, s3
-; CHECK-STD-NEXT: fadd s0, s0, s4
-; CHECK-STD-NEXT: fadd s0, s0, s5
-; CHECK-STD-NEXT: fadd s0, s0, s6
-; CHECK-STD-NEXT: fadd s0, s0, s7
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds5:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s4, s5
-; CHECK-UNSAFE-NEXT: fadd s1, s1, s6
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s7
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds5:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s0, s3
+; CHECK-NEXT: fadd s0, s0, s4
+; CHECK-NEXT: fadd s0, s0, s5
+; CHECK-NEXT: fadd s0, s0, s6
+; CHECK-NEXT: fadd s0, s0, s7
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
@@ -158,141 +150,198 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
ret float %t6
}
+define float @reassociate_adds5_reassoc(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s4, s5
+; CHECK-NEXT: fadd s1, s1, s6
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s7
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %t1, %x3
+ %t3 = fadd reassoc nsz float %t2, %x4
+ %t4 = fadd reassoc nsz float %t3, %x5
+ %t5 = fadd reassoc nsz float %t4, %x6
+ %t6 = fadd reassoc nsz float %t5, %x7
+ ret float %t6
+}
+
; Verify that we only need two associative operations to reassociate the operands.
; Also, we should reassociate such that the result of the high latency division
; is used by the final 'add' rather than reassociating the %x3 operand with the
; division. The latter reassociation would not improve anything.
define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds6:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s2, s0
-; CHECK-STD-NEXT: fadd s0, s3, s0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds6:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
-; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds6:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fadd s0, s2, s0
+; CHECK-NEXT: fadd s0, s3, s0
+; CHECK-NEXT: ret
%t0 = fdiv float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
ret float %t2
}
+define float @reassociate_adds6_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds6_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fadd s1, s3, s2
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %x2, %t0
+ %t2 = fadd reassoc nsz float %x3, %t1
+ ret float %t2
+}
+
; Verify that scalar single-precision multiplies are reassociated.
define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_muls1:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv s0, s0, s1
-; CHECK-STD-NEXT: fmul s0, s2, s0
-; CHECK-STD-NEXT: fmul s0, s3, s0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls1:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1
-; CHECK-UNSAFE-NEXT: fmul s1, s3, s2
-; CHECK-UNSAFE-NEXT: fmul s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fmul s0, s2, s0
+; CHECK-NEXT: fmul s0, s3, s0
+; CHECK-NEXT: ret
%t0 = fdiv float %x0, %x1
%t1 = fmul float %x2, %t0
%t2 = fmul float %x3, %t1
ret float %t2
}
+define float @reassociate_muls1_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_muls1_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fmul s1, s3, s2
+; CHECK-NEXT: fmul s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz float %x0, %x1
+ %t1 = fmul reassoc nsz float %x2, %t0
+ %t2 = fmul reassoc nsz float %x3, %t1
+ ret float %t2
+}
+
; Verify that scalar double-precision adds are reassociated.
define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
-; CHECK-STD-LABEL: reassociate_adds_double:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv d0, d0, d1
-; CHECK-STD-NEXT: fadd d0, d2, d0
-; CHECK-STD-NEXT: fadd d0, d3, d0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_double:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1
-; CHECK-UNSAFE-NEXT: fadd d1, d3, d2
-; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fadd d0, d2, d0
+; CHECK-NEXT: fadd d0, d3, d0
+; CHECK-NEXT: ret
%t0 = fdiv double %x0, %x1
%t1 = fadd double %x2, %t0
%t2 = fadd double %x3, %t1
ret double %t2
}
+define double @reassociate_adds_double_reassoc(double %x0, double %x1, double %x2, double %x3) {
+; CHECK-LABEL: reassociate_adds_double_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fadd d1, d3, d2
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz double %x0, %x1
+ %t1 = fadd reassoc nsz double %x2, %t0
+ %t2 = fadd reassoc nsz double %x3, %t1
+ ret double %t2
+}
+
; Verify that scalar double-precision multiplies are reassociated.
define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
-; CHECK-STD-LABEL: reassociate_muls_double:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv d0, d0, d1
-; CHECK-STD-NEXT: fmul d0, d2, d0
-; CHECK-STD-NEXT: fmul d0, d3, d0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_double:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1
-; CHECK-UNSAFE-NEXT: fmul d1, d3, d2
-; CHECK-UNSAFE-NEXT: fmul d0, d1, d0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fmul d0, d2, d0
+; CHECK-NEXT: fmul d0, d3, d0
+; CHECK-NEXT: ret
%t0 = fdiv double %x0, %x1
%t1 = fmul double %x2, %t0
%t2 = fmul double %x3, %t1
ret double %t2
}
+define double @reassociate_muls_double_reassoc(double %x0, double %x1, double %x2, double %x3) {
+; CHECK-LABEL: reassociate_muls_double_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fmul d1, d3, d2
+; CHECK-NEXT: fmul d0, d1, d0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz double %x0, %x1
+ %t1 = fmul reassoc nsz double %x2, %t0
+ %t2 = fmul reassoc nsz double %x3, %t1
+ ret double %t2
+}
+
; Verify that scalar half-precision adds are reassociated.
define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
-; CHECK-STD-LABEL: reassociate_adds_half:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv h0, h0, h1
-; CHECK-STD-NEXT: fadd h0, h2, h0
-; CHECK-STD-NEXT: fadd h0, h3, h0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_half:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
-; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
-; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fadd h0, h2, h0
+; CHECK-NEXT: fadd h0, h3, h0
+; CHECK-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fadd half %x2, %t0
%t2 = fadd half %x3, %t1
ret half %t2
}
+define half @reassociate_adds_half_reassoc(half %x0, half %x1, half %x2, half %x3) {
+; CHECK-LABEL: reassociate_adds_half_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fadd h1, h3, h2
+; CHECK-NEXT: fadd h0, h1, h0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz half %x0, %x1
+ %t1 = fadd reassoc nsz half %x2, %t0
+ %t2 = fadd reassoc nsz half %x3, %t1
+ ret half %t2
+}
+
; Verify that scalar half-precision multiplies are reassociated.
define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
-; CHECK-STD-LABEL: reassociate_muls_half:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv h0, h0, h1
-; CHECK-STD-NEXT: fmul h0, h2, h0
-; CHECK-STD-NEXT: fmul h0, h3, h0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_half:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
-; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
-; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fmul h0, h2, h0
+; CHECK-NEXT: fmul h0, h3, h0
+; CHECK-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fmul half %x2, %t0
%t2 = fmul half %x3, %t1
ret half %t2
}
+define half @reassociate_muls_half_reassoc(half %x0, half %x1, half %x2, half %x3) {
+; CHECK-LABEL: reassociate_muls_half_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fmul h1, h3, h2
+; CHECK-NEXT: fmul h0, h1, h0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz half %x0, %x1
+ %t1 = fmul reassoc nsz half %x2, %t0
+ %t2 = fmul reassoc nsz half %x3, %t1
+ ret half %t2
+}
+
; Verify that scalar integer adds are reassociated.
define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
@@ -365,173 +414,222 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
; Verify that we reassociate vector instructions too.
define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: vector_reassociate_adds1:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: vector_reassociate_adds1:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: vector_reassociate_adds1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %t0, %x2
%t2 = fadd <4 x float> %t1, %x3
ret <4 x float> %t2
}
+define <4 x float> @vector_reassociate_adds1_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds1_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %t0, %x2
+ %t2 = fadd reassoc nsz <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: vector_reassociate_adds2:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: vector_reassociate_adds2:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: vector_reassociate_adds2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %x2, %t0
%t2 = fadd <4 x float> %t1, %x3
ret <4 x float> %t2
}
+define <4 x float> @vector_reassociate_adds2_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds2_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+ %t2 = fadd reassoc nsz <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: vector_reassociate_adds3:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s
-; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: vector_reassociate_adds3:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: vector_reassociate_adds3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %t0, %x2
%t2 = fadd <4 x float> %x3, %t1
ret <4 x float> %t2
}
+define <4 x float> @vector_reassociate_adds3_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds3_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v3.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %t0, %x2
+ %t2 = fadd reassoc nsz <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: vector_reassociate_adds4:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s
-; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: vector_reassociate_adds4:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: vector_reassociate_adds4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %x2, %t0
%t2 = fadd <4 x float> %x3, %t1
ret <4 x float> %t2
}
+define <4 x float> @vector_reassociate_adds4_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds4_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v3.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+ %t2 = fadd reassoc nsz <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
; Verify that 64-bit vector half-precision adds are reassociated.
define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) {
-; CHECK-STD-LABEL: reassociate_adds_v4f16:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4h, v0.4h, v1.4h
-; CHECK-STD-NEXT: fadd v0.4h, v2.4h, v0.4h
-; CHECK-STD-NEXT: fadd v0.4h, v3.4h, v0.4h
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4h, v0.4h, v1.4h
-; CHECK-UNSAFE-NEXT: fadd v1.4h, v3.4h, v2.4h
-; CHECK-UNSAFE-NEXT: fadd v0.4h, v1.4h, v0.4h
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: fadd v0.4h, v2.4h, v0.4h
+; CHECK-NEXT: fadd v0.4h, v3.4h, v0.4h
+; CHECK-NEXT: ret
%t0 = fadd <4 x half> %x0, %x1
%t1 = fadd <4 x half> %x2, %t0
%t2 = fadd <4 x half> %x3, %t1
ret <4 x half> %t2
}
+define <4 x half> @reassociate_adds_v4f16_reassoc(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) {
+; CHECK-LABEL: reassociate_adds_v4f16_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: fadd v1.4h, v3.4h, v2.4h
+; CHECK-NEXT: fadd v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x half> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x half> %x2, %t0
+ %t2 = fadd reassoc nsz <4 x half> %x3, %t1
+ ret <4 x half> %t2
+}
+
; Verify that 128-bit vector half-precision multiplies are reassociated.
define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) {
-; CHECK-STD-LABEL: reassociate_muls_v8f16:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.8h, v0.8h, v1.8h
-; CHECK-STD-NEXT: fmul v0.8h, v2.8h, v0.8h
-; CHECK-STD-NEXT: fmul v0.8h, v3.8h, v0.8h
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.8h, v0.8h, v1.8h
-; CHECK-UNSAFE-NEXT: fmul v1.8h, v3.8h, v2.8h
-; CHECK-UNSAFE-NEXT: fmul v0.8h, v1.8h, v0.8h
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: fmul v0.8h, v2.8h, v0.8h
+; CHECK-NEXT: fmul v0.8h, v3.8h, v0.8h
+; CHECK-NEXT: ret
%t0 = fadd <8 x half> %x0, %x1
%t1 = fmul <8 x half> %x2, %t0
%t2 = fmul <8 x half> %x3, %t1
ret <8 x half> %t2
}
+define <8 x half> @reassociate_muls_v8f16_reassoc(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) {
+; CHECK-LABEL: reassociate_muls_v8f16_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: fmul v1.8h, v3.8h, v2.8h
+; CHECK-NEXT: fmul v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <8 x half> %x0, %x1
+ %t1 = fmul reassoc nsz <8 x half> %x2, %t0
+ %t2 = fmul reassoc nsz <8 x half> %x3, %t1
+ ret <8 x half> %t2
+}
+
; Verify that 128-bit vector single-precision multiplies are reassociated.
define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: reassociate_muls_v4f32:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fmul v0.4s, v2.4s, v0.4s
-; CHECK-STD-NEXT: fmul v0.4s, v3.4s, v0.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_v4f32:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fmul v1.4s, v3.4s, v2.4s
-; CHECK-UNSAFE-NEXT: fmul v0.4s, v1.4s, v0.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v3.4s, v0.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fmul <4 x float> %x2, %t0
%t2 = fmul <4 x float> %x3, %t1
ret <4 x float> %t2
}
+define <4 x float> @reassociate_muls_v4f32_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: reassociate_muls_v4f32_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fmul v1.4s, v3.4s, v2.4s
+; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fmul reassoc nsz <4 x float> %x2, %t0
+ %t2 = fmul reassoc nsz <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
; Verify that 128-bit vector double-precision multiplies are reassociated.
define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
-; CHECK-STD-LABEL: reassociate_muls_v2f64:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.2d, v0.2d, v1.2d
-; CHECK-STD-NEXT: fmul v0.2d, v2.2d, v0.2d
-; CHECK-STD-NEXT: fmul v0.2d, v3.2d, v0.2d
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_v2f64:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.2d, v0.2d, v1.2d
-; CHECK-UNSAFE-NEXT: fmul v1.2d, v3.2d, v2.2d
-; CHECK-UNSAFE-NEXT: fmul v0.2d, v1.2d, v0.2d
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fmul v0.2d, v2.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v3.2d, v0.2d
+; CHECK-NEXT: ret
%t0 = fadd <2 x double> %x0, %x1
%t1 = fmul <2 x double> %x2, %t0
%t2 = fmul <2 x double> %x3, %t1
ret <2 x double> %t2
}
+define <2 x double> @reassociate_muls_v2f64_reassoc(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; CHECK-LABEL: reassociate_muls_v2f64_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fmul v1.2d, v3.2d, v2.2d
+; CHECK-NEXT: fmul v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <2 x double> %x0, %x1
+ %t1 = fmul reassoc nsz <2 x double> %x2, %t0
+ %t2 = fmul reassoc nsz <2 x double> %x3, %t1
+ ret <2 x double> %t2
+}
+
+
; Verify that vector integer arithmetic operations are reassociated.
define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) {
@@ -606,65 +704,83 @@ define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
; Verify that scalable vector FP arithmetic operations are reassociated.
define <vscale x 8 x half> @reassociate_adds_nxv4f16(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) {
-; CHECK-STD-LABEL: reassociate_adds_nxv4f16:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd z0.h, z0.h, z1.h
-; CHECK-STD-NEXT: fadd z0.h, z2.h, z0.h
-; CHECK-STD-NEXT: fadd z0.h, z3.h, z0.h
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f16:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd z0.h, z0.h, z1.h
-; CHECK-UNSAFE-NEXT: fadd z1.h, z3.h, z2.h
-; CHECK-UNSAFE-NEXT: fadd z0.h, z1.h, z0.h
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd z0.h, z0.h, z1.h
+; CHECK-NEXT: fadd z0.h, z2.h, z0.h
+; CHECK-NEXT: fadd z0.h, z3.h, z0.h
+; CHECK-NEXT: ret
%t0 = fadd reassoc <vscale x 8 x half> %x0, %x1
%t1 = fadd reassoc <vscale x 8 x half> %x2, %t0
%t2 = fadd reassoc <vscale x 8 x half> %x3, %t1
ret <vscale x 8 x half> %t2
}
+define <vscale x 8 x half> @reassociate_adds_nxv4f16_nsz(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) {
+; CHECK-LABEL: reassociate_adds_nxv4f16_nsz:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd z0.h, z0.h, z1.h
+; CHECK-NEXT: fadd z1.h, z3.h, z2.h
+; CHECK-NEXT: fadd z0.h, z1.h, z0.h
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <vscale x 8 x half> %x0, %x1
+ %t1 = fadd reassoc nsz <vscale x 8 x half> %x2, %t0
+ %t2 = fadd reassoc nsz <vscale x 8 x half> %x3, %t1
+ ret <vscale x 8 x half> %t2
+}
+
define <vscale x 4 x float> @reassociate_adds_nxv4f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) {
-; CHECK-STD-LABEL: reassociate_adds_nxv4f32:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd z0.s, z0.s, z1.s
-; CHECK-STD-NEXT: fadd z0.s, z2.s, z0.s
-; CHECK-STD-NEXT: fadd z0.s, z3.s, z0.s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f32:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd z0.s, z0.s, z1.s
-; CHECK-UNSAFE-NEXT: fadd z1.s, z3.s, z2.s
-; CHECK-UNSAFE-NEXT: fadd z0.s, z1.s, z0.s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd z0.s, z0.s, z1.s
+; CHECK-NEXT: fadd z0.s, z2.s, z0.s
+; CHECK-NEXT: fadd z0.s, z3.s, z0.s
+; CHECK-NEXT: ret
%t0 = fadd reassoc <vscale x 4 x float> %x0, %x1
%t1 = fadd reassoc <vscale x 4 x float> %x2, %t0
%t2 = fadd reassoc <vscale x 4 x float> %x3, %t1
ret <vscale x 4 x float> %t2
}
+define <vscale x 4 x float> @reassociate_adds_nxv4f32_nsz(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) {
+; CHECK-LABEL: reassociate_adds_nxv4f32_nsz:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd z0.s, z0.s, z1.s
+; CHECK-NEXT: fadd z1.s, z3.s, z2.s
+; CHECK-NEXT: fadd z0.s, z1.s, z0.s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <vscale x 4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <vscale x 4 x float> %x2, %t0
+ %t2 = fadd reassoc nsz <vscale x 4 x float> %x3, %t1
+ ret <vscale x 4 x float> %t2
+}
+
define <vscale x 2 x double> @reassociate_muls_nxv2f64(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) {
-; CHECK-STD-LABEL: reassociate_muls_nxv2f64:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fmul z0.d, z0.d, z1.d
-; CHECK-STD-NEXT: fmul z0.d, z2.d, z0.d
-; CHECK-STD-NEXT: fmul z0.d, z3.d, z0.d
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_nxv2f64:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fmul z0.d, z0.d, z1.d
-; CHECK-UNSAFE-NEXT: fmul z1.d, z3.d, z2.d
-; CHECK-UNSAFE-NEXT: fmul z0.d, z1.d, z0.d
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul z0.d, z0.d, z1.d
+; CHECK-NEXT: fmul z0.d, z2.d, z0.d
+; CHECK-NEXT: fmul z0.d, z3.d, z0.d
+; CHECK-NEXT: ret
%t0 = fmul reassoc <vscale x 2 x double> %x0, %x1
%t1 = fmul reassoc <vscale x 2 x double> %x2, %t0
%t2 = fmul reassoc <vscale x 2 x double> %x3, %t1
ret <vscale x 2 x double> %t2
}
+define <vscale x 2 x double> @reassociate_muls_nxv2f64_nsz(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) {
+; CHECK-LABEL: reassociate_muls_nxv2f64_nsz:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul z0.d, z0.d, z1.d
+; CHECK-NEXT: fmul z1.d, z3.d, z2.d
+; CHECK-NEXT: fmul z0.d, z1.d, z0.d
+; CHECK-NEXT: ret
+ %t0 = fmul reassoc nsz <vscale x 2 x double> %x0, %x1
+ %t1 = fmul reassoc nsz <vscale x 2 x double> %x2, %t0
+ %t2 = fmul reassoc nsz <vscale x 2 x double> %x3, %t1
+ ret <vscale x 2 x double> %t2
+}
+
; Verify that scalable vector integer arithmetic operations are reassociated.
define <vscale x 16 x i8> @reassociate_muls_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) {
@@ -753,55 +869,30 @@ define <vscale x 8 x i16> @reassociate_ors_nxv8i16(<vscale x 8 x i16> %x0, <vsca
declare double @bar()
define double @reassociate_adds_from_calls() {
-; CHECK-STD-LABEL: reassociate_adds_from_calls:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-STD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
-; CHECK-STD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
-; CHECK-STD-NEXT: .cfi_def_cfa_offset 32
-; CHECK-STD-NEXT: .cfi_offset w30, -8
-; CHECK-STD-NEXT: .cfi_offset b8, -16
-; CHECK-STD-NEXT: .cfi_offset b9, -24
-; CHECK-STD-NEXT: .cfi_offset b10, -32
-; CHECK-STD-NEXT: bl bar
-; CHECK-STD-NEXT: fmov d8, d0
-; CHECK-STD-NEXT: bl bar
-; CHECK-STD-NEXT: fmov d9, d0
-; CHECK-STD-NEXT: bl bar
-; CHECK-STD-NEXT: fmov d10, d0
-; CHECK-STD-NEXT: bl bar
-; CHECK-STD-NEXT: fadd d1, d8, d9
-; CHECK-STD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
-; CHECK-STD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
-; CHECK-STD-NEXT: fadd d1, d1, d10
-; CHECK-STD-NEXT: fadd d0, d1, d0
-; CHECK-STD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_from_calls:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-UNSAFE-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
-; CHECK-UNSAFE-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
-; CHECK-UNSAFE-NEXT: .cfi_def_cfa_offset 32
-; CHECK-UNSAFE-NEXT: .cfi_offset w30, -8
-; CHECK-UNSAFE-NEXT: .cfi_offset b8, -16
-; CHECK-UNSAFE-NEXT: .cfi_offset b9, -24
-; CHECK-UNSAFE-NEXT: .cfi_offset b10, -32
-; CHECK-UNSAFE-NEXT: bl bar
-; CHECK-UNSAFE-NEXT: fmov d8, d0
-; CHECK-UNSAFE-NEXT: bl bar
-; CHECK-UNSAFE-NEXT: fmov d9, d0
-; CHECK-UNSAFE-NEXT: bl bar
-; CHECK-UNSAFE-NEXT: fmov d10, d0
-; CHECK-UNSAFE-NEXT: bl bar
-; CHECK-UNSAFE-NEXT: fadd d1, d8, d9
-; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
-; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
-; CHECK-UNSAFE-NEXT: fadd d0, d10, d0
-; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
-; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_from_calls:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset b8, -16
+; CHECK-NEXT: .cfi_offset b9, -24
+; CHECK-NEXT: .cfi_offset b10, -32
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d8, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d9, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d10, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fadd d1, d8, d9
+; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: fadd d1, d1, d10
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
%x0 = call double @bar()
%x1 = call double @bar()
%x2 = call double @bar()
@@ -812,6 +903,41 @@ define double @reassociate_adds_from_calls() {
ret double %t2
}
+define double @reassociate_adds_from_calls_reassoc() {
+; CHECK-LABEL: reassociate_adds_from_calls_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset b8, -16
+; CHECK-NEXT: .cfi_offset b9, -24
+; CHECK-NEXT: .cfi_offset b10, -32
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d8, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d9, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d10, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fadd d1, d8, d9
+; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: fadd d0, d10, d0
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %x0 = call reassoc nsz double @bar()
+ %x1 = call reassoc nsz double @bar()
+ %x2 = call reassoc nsz double @bar()
+ %x3 = call reassoc nsz double @bar()
+ %t0 = fadd reassoc nsz double %x0, %x1
+ %t1 = fadd reassoc nsz double %t0, %x2
+ %t2 = fadd reassoc nsz double %t1, %x3
+ ret double %t2
+}
+
define double @already_reassociated() {
; CHECK-LABEL: already_reassociated:
; CHECK: // %bb.0:
@@ -846,3 +972,38 @@ define double @already_reassociated() {
%t2 = fadd double %t0, %t1
ret double %t2
}
+
+define double @already_reassociated_reassoc() {
+; CHECK-LABEL: already_reassociated_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset b8, -16
+; CHECK-NEXT: .cfi_offset b9, -24
+; CHECK-NEXT: .cfi_offset b10, -32
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d8, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d9, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d10, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fadd d1, d8, d9
+; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: fadd d0, d10, d0
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %x0 = call reassoc nsz double @bar()
+ %x1 = call reassoc nsz double @bar()
+ %x2 = call reassoc nsz double @bar()
+ %x3 = call reassoc nsz double @bar()
+ %t0 = fadd reassoc nsz double %x0, %x1
+ %t1 = fadd reassoc nsz double %x2, %x3
+ %t2 = fadd reassoc nsz double %t0, %t1
+ ret double %t2
+}
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.mir b/llvm/test/CodeGen/AArch64/machine-combiner.mir
index b967aaa885784..a0e1280e15cf2 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.mir
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -enable-unsafe-fp-math \
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 \
# RUN: -run-pass machine-combiner -machine-combiner-inc-threshold=0 \
# RUN: -machine-combiner-verify-pattern-order=true -verify-machineinstrs -o - %s | FileCheck %s
---
@@ -36,8 +36,8 @@ body: |
%6 = ADDWrr %3, killed %5
%7 = SCVTFUWDri killed %6, implicit $fpcr
; CHECK: FMADDDrrr %7, %7, %0, implicit $fpcr
- %8 = FMULDrr %7, %7, implicit $fpcr
- %9 = FADDDrr %0, killed %8, implicit $fpcr
+ %8 = contract FMULDrr %7, %7, implicit $fpcr
+ %9 = contract FADDDrr %0, killed %8, implicit $fpcr
$d0 = COPY %9
RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
index cd5383388ce2d..fc5012c4af47e 100644
--- a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
@@ -23,21 +23,21 @@ entry:
%scevgep = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 8, i32 0
%vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep)
%ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1
- %fm1 = fmul <4 x float> %f, %ev1
- %av1 = fadd <4 x float> %f, %fm1
+ %fm1 = fmul contract <4 x float> %f, %ev1
+ %av1 = fadd contract <4 x float> %f, %fm1
%ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0
- %fm2 = fmul <4 x float> %f, %ev2
- %av2 = fadd <4 x float> %f, %fm2
+ %fm2 = fmul contract <4 x float> %f, %ev2
+ %av2 = fadd contract <4 x float> %f, %fm2
%scevgep2 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 8, i32 0
tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av2, <4 x float> %av1, ptr %scevgep2)
%scevgep3 = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 12, i32 0
%vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep3)
%ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1
- %fm3 = fmul <4 x float> %f, %ev3
- %av3 = fadd <4 x float> %f, %fm3
+ %fm3 = fmul contract <4 x float> %f, %ev3
+ %av3 = fadd contract <4 x float> %f, %fm3
%ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0
- %fm4 = fmul <4 x float> %f, %ev4
- %av4 = fadd <4 x float> %f, %fm4
+ %fm4 = fmul contract <4 x float> %f, %ev4
+ %av4 = fadd contract <4 x float> %f, %fm4
%scevgep4 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 12, i32 0
tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av4, <4 x float> %av3, ptr %scevgep4)
ret void
@@ -49,6 +49,6 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr) #2
; Function Attrs: nounwind
declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr nocapture) #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
>From be18543139934776acc3b5c8ff04fd7e10b097ad Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 28 Jul 2025 17:49:52 +0800
Subject: [PATCH 3/3] fix sqrt-fastmath
---
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +-
llvm/test/CodeGen/AArch64/sqrt-fastmath.ll | 53 +++++++++----------
2 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4f7bd79773f14..107ad1bc8f9fb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -12290,7 +12290,8 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
SDLoc DL(Operand);
EVT VT = Operand.getValueType();
- SDNodeFlags Flags = SDNodeFlags::AllowReassociation;
+ SDNodeFlags Flags =
+ SDNodeFlags::AllowReassociation | SDNodeFlags::NoSignedZeros;
// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
index f73b4bdba36e6..e29993d02935a 100644
--- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
@@ -2,15 +2,15 @@
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s
-declare float @llvm.sqrt.f32(float) #0
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
-declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
-declare double @llvm.sqrt.f64(double) #0
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
-declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0
+declare float @llvm.sqrt.f32(float)
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
+declare double @llvm.sqrt.f64(double)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
-define float @fsqrt(float %a) #0 {
+define float @fsqrt(float %a) {
; FAULT-LABEL: fsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt s0, s0
@@ -33,7 +33,7 @@ define float @fsqrt(float %a) #0 {
ret float %1
}
-define float @fsqrt_ieee_denorms(float %a) #1 {
+define float @fsqrt_ieee_denorms(float %a) #0 {
; FAULT-LABEL: fsqrt_ieee_denorms:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt s0, s0
@@ -56,7 +56,7 @@ define float @fsqrt_ieee_denorms(float %a) #1 {
ret float %1
}
-define <2 x float> @f2sqrt(<2 x float> %a) #0 {
+define <2 x float> @f2sqrt(<2 x float> %a) {
; FAULT-LABEL: f2sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2s, v0.2s
@@ -79,7 +79,7 @@ define <2 x float> @f2sqrt(<2 x float> %a) #0 {
ret <2 x float> %1
}
-define <4 x float> @f4sqrt(<4 x float> %a) #0 {
+define <4 x float> @f4sqrt(<4 x float> %a) {
; FAULT-LABEL: f4sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.4s, v0.4s
@@ -102,7 +102,7 @@ define <4 x float> @f4sqrt(<4 x float> %a) #0 {
ret <4 x float> %1
}
-define <8 x float> @f8sqrt(<8 x float> %a) #0 {
+define <8 x float> @f8sqrt(<8 x float> %a) {
; FAULT-LABEL: f8sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.4s, v0.4s
@@ -136,7 +136,7 @@ define <8 x float> @f8sqrt(<8 x float> %a) #0 {
ret <8 x float> %1
}
-define double @dsqrt(double %a) #0 {
+define double @dsqrt(double %a) {
; FAULT-LABEL: dsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt d0, d0
@@ -162,7 +162,7 @@ define double @dsqrt(double %a) #0 {
ret double %1
}
-define double @dsqrt_ieee_denorms(double %a) #1 {
+define double @dsqrt_ieee_denorms(double %a) #0 {
; FAULT-LABEL: dsqrt_ieee_denorms:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt d0, d0
@@ -188,7 +188,7 @@ define double @dsqrt_ieee_denorms(double %a) #1 {
ret double %1
}
-define <2 x double> @d2sqrt(<2 x double> %a) #0 {
+define <2 x double> @d2sqrt(<2 x double> %a) {
; FAULT-LABEL: d2sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2d, v0.2d
@@ -214,7 +214,7 @@ define <2 x double> @d2sqrt(<2 x double> %a) #0 {
ret <2 x double> %1
}
-define <4 x double> @d4sqrt(<4 x double> %a) #0 {
+define <4 x double> @d4sqrt(<4 x double> %a) {
; FAULT-LABEL: d4sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2d, v0.2d
@@ -254,7 +254,7 @@ define <4 x double> @d4sqrt(<4 x double> %a) #0 {
ret <4 x double> %1
}
-define float @frsqrt(float %a) #0 {
+define float @frsqrt(float %a) {
; FAULT-LABEL: frsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt s0, s0
@@ -277,7 +277,7 @@ define float @frsqrt(float %a) #0 {
ret float %2
}
-define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
+define <2 x float> @f2rsqrt(<2 x float> %a) {
; FAULT-LABEL: f2rsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2s, v0.2s
@@ -300,7 +300,7 @@ define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
ret <2 x float> %2
}
-define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
+define <4 x float> @f4rsqrt(<4 x float> %a) {
; FAULT-LABEL: f4rsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.4s, v0.4s
@@ -323,7 +323,7 @@ define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
ret <4 x float> %2
}
-define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
+define <8 x float> @f8rsqrt(<8 x float> %a) {
; FAULT-LABEL: f8rsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.4s, v0.4s
@@ -355,7 +355,7 @@ define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
ret <8 x float> %2
}
-define double @drsqrt(double %a) #0 {
+define double @drsqrt(double %a) {
; FAULT-LABEL: drsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt d0, d0
@@ -381,7 +381,7 @@ define double @drsqrt(double %a) #0 {
ret double %2
}
-define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
+define <2 x double> @d2rsqrt(<2 x double> %a) {
; FAULT-LABEL: d2rsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2d, v0.2d
@@ -462,8 +462,8 @@ define double @sqrt_fdiv_common_operand(double %x) nounwind {
; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fmul d2, d1, d1
; CHECK-NEXT: frsqrts d2, d0, d2
-; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: fmul d0, d0, d2
; CHECK-NEXT: ret
%sqrt = call fast double @llvm.sqrt.f64(double %x)
%r = fdiv fast double %x, %sqrt
@@ -487,8 +487,8 @@ define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind {
; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
-; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v2.2d
; CHECK-NEXT: ret
%sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
%r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt
@@ -513,9 +513,9 @@ define double @sqrt_fdiv_common_operand_extra_use(double %x, ptr %p) nounwind {
; CHECK-NEXT: frsqrts d2, d0, d2
; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fmul d2, d1, d1
+; CHECK-NEXT: fmul d1, d0, d1
; CHECK-NEXT: frsqrts d2, d0, d2
; CHECK-NEXT: fmul d1, d1, d2
-; CHECK-NEXT: fmul d1, d0, d1
; CHECK-NEXT: fcsel d2, d0, d1, eq
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: str d2, [x0]
@@ -671,5 +671,4 @@ define double @sqrt_simplify_before_recip_4_uses(double %x, ptr %p1, ptr %p2, pt
ret double %sqrt_fast
}
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" }
+attributes #0 = { "denormal-fp-math"="ieee" }
More information about the llvm-commits
mailing list