[llvm] f398739 - [FEnv] Constfold some unary constrained operations
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 27 22:31:10 PDT 2020
Author: Serge Pavlov
Date: 2020-03-28T12:28:33+07:00
New Revision: f398739152948377b81d9ce393d62c18e105699c
URL: https://github.com/llvm/llvm-project/commit/f398739152948377b81d9ce393d62c18e105699c
DIFF: https://github.com/llvm/llvm-project/commit/f398739152948377b81d9ce393d62c18e105699c.diff
LOG: [FEnv] Constfold some unary constrained operations
This change implements constant folding to constrained versions of
intrinsics, implementing rounding: floor, ceil, trunc, round, rint and
nearbyint.
Differential Revision: https://reviews.llvm.org/D72930
Added:
llvm/test/Transforms/InstSimplify/constfold-constrained.ll
Modified:
llvm/include/llvm/IR/FPEnv.h
llvm/lib/Analysis/ConstantFolding.cpp
llvm/lib/Analysis/InstructionSimplify.cpp
llvm/lib/IR/FPEnv.cpp
llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h
index a1e0665d4112..0e7e96ff700a 100644
--- a/llvm/include/llvm/IR/FPEnv.h
+++ b/llvm/include/llvm/IR/FPEnv.h
@@ -15,6 +15,7 @@
#ifndef LLVM_IR_FLOATINGPOINT_H
#define LLVM_IR_FLOATINGPOINT_H
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include <stdint.h>
@@ -66,5 +67,8 @@ Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef);
/// input in constrained intrinsic exception behavior metadata.
Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior);
+/// Converts rounding mode represented by fp::RoundingMode to the rounding mode
+/// index used by APFloat. For fp::rmDynamic it returns None.
+Optional<APFloatBase::roundingMode> getAPFloatRoundingMode(fp::RoundingMode);
}
#endif
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 0adabf279178..5efebe2937ed 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsX86.h"
@@ -1396,41 +1397,19 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
//
bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
- if (Call->isNoBuiltin() || Call->isStrictFP())
+ if (Call->isNoBuiltin())
return false;
switch (F->getIntrinsicID()) {
- case Intrinsic::fabs:
- case Intrinsic::minnum:
- case Intrinsic::maxnum:
- case Intrinsic::minimum:
- case Intrinsic::maximum:
- case Intrinsic::log:
- case Intrinsic::log2:
- case Intrinsic::log10:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::sqrt:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::pow:
- case Intrinsic::powi:
+ // Operations that do not operate floating-point numbers and do not depend on
+ // FP environment can be folded even in strictfp functions.
case Intrinsic::bswap:
case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::fshl:
case Intrinsic::fshr:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- case Intrinsic::copysign:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
- case Intrinsic::round:
case Intrinsic::masked_load:
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
@@ -1444,9 +1423,31 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::usub_sat:
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
+ case Intrinsic::bitreverse:
+ case Intrinsic::is_constant:
+ return true;
+
+ // Floating point operations cannot be folded in strictfp functions in
+ // general case. They can be folded if FP environment is known to compiler.
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::sqrt:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ case Intrinsic::pow:
+ case Intrinsic::powi:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
- case Intrinsic::bitreverse:
+ // The intrinsics below depend on rounding mode in MXCSR.
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
case Intrinsic::amdgcn_cubesc:
@@ -1477,14 +1478,35 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::x86_avx512_vcvtsd2usi64:
case Intrinsic::x86_avx512_cvttsd2usi:
case Intrinsic::x86_avx512_cvttsd2usi64:
- case Intrinsic::is_constant:
+ return !Call->isStrictFP();
+
+ // Sign operations are actually bitwise operations, they do not raise
+ // exceptions even for SNANs.
+ case Intrinsic::fabs:
+ case Intrinsic::copysign:
+ // Non-constrained variants of rounding operations means default FP
+ // environment, they can be folded in any case.
+ case Intrinsic::ceil:
+ case Intrinsic::floor:
+ case Intrinsic::round:
+ case Intrinsic::trunc:
+ case Intrinsic::nearbyint:
+ case Intrinsic::rint:
+ // Constrained intrinsics can be folded if FP environment is known
+ // to compiler.
+ case Intrinsic::experimental_constrained_ceil:
+ case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_round:
+ case Intrinsic::experimental_constrained_trunc:
+ case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_rint:
return true;
default:
return false;
case Intrinsic::not_intrinsic: break;
}
- if (!F->hasName())
+ if (!F->hasName() || Call->isStrictFP())
return false;
// In these cases, the check of the length is required. We don't want to
@@ -1792,6 +1814,55 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
}
+ // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
+ // raise FP exceptions, unless the argument is signaling NaN.
+
+ Optional<APFloat::roundingMode> RM;
+ switch (IntrinsicID) {
+ default:
+ break;
+ case Intrinsic::experimental_constrained_nearbyint:
+ case Intrinsic::experimental_constrained_rint: {
+ auto CI = cast<ConstrainedFPIntrinsic>(Call);
+ Optional<fp::RoundingMode> RMOp = CI->getRoundingMode();
+ if (RMOp)
+ RM = getAPFloatRoundingMode(*RMOp);
+ if (!RM)
+ return nullptr;
+ break;
+ }
+ case Intrinsic::experimental_constrained_round:
+ RM = APFloat::rmNearestTiesToAway;
+ break;
+ case Intrinsic::experimental_constrained_ceil:
+ RM = APFloat::rmTowardPositive;
+ break;
+ case Intrinsic::experimental_constrained_floor:
+ RM = APFloat::rmTowardNegative;
+ break;
+ case Intrinsic::experimental_constrained_trunc:
+ RM = APFloat::rmTowardZero;
+ break;
+ }
+ if (RM) {
+ auto CI = cast<ConstrainedFPIntrinsic>(Call);
+ if (U.isFinite()) {
+ APFloat::opStatus St = U.roundToIntegral(*RM);
+ if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
+ St == APFloat::opInexact) {
+ Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+ if (EB && *EB == fp::ebStrict)
+ return nullptr;
+ }
+ } else if (U.isSignaling()) {
+ Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+ if (EB && *EB != fp::ebIgnore)
+ return nullptr;
+ U = APFloat::getQNaN(U.getSemantics());
+ }
+ return ConstantFP::get(Ty->getContext(), U);
+ }
+
/// We only fold functions with finite arguments. Folding NaN and inf is
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
@@ -2573,7 +2644,7 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI) {
- if (Call->isNoBuiltin() || Call->isStrictFP())
+ if (Call->isNoBuiltin())
return nullptr;
if (!F->hasName())
return nullptr;
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index ba1f55048700..e62ddf793a13 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5368,8 +5368,11 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
ConstantArgs.reserve(NumArgs);
for (auto &Arg : Call->args()) {
Constant *C = dyn_cast<Constant>(&Arg);
- if (!C)
+ if (!C) {
+ if (isa<MetadataAsValue>(Arg.get()))
+ continue;
return nullptr;
+ }
ConstantArgs.push_back(C);
}
diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp
index 49b6d9a8dac3..ab68f55f834d 100644
--- a/llvm/lib/IR/FPEnv.cpp
+++ b/llvm/lib/IR/FPEnv.cpp
@@ -75,4 +75,20 @@ Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
return ExceptStr;
}
+Optional<APFloatBase::roundingMode>
+getAPFloatRoundingMode(fp::RoundingMode RM) {
+ switch (RM) {
+ case fp::rmDynamic:
+ return None;
+ case fp::rmToNearest:
+ return APFloat::rmNearestTiesToEven;
+ case fp::rmDownward:
+ return APFloat::rmTowardNegative;
+ case fp::rmUpward:
+ return APFloat::rmTowardPositive;
+ case fp::rmTowardZero:
+ return APFloat::rmTowardZero;
+ }
+ llvm_unreachable("Unexpected rounding mode");
+}
}
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 0d8795c82914..1e5000a9854f 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -6373,19 +6373,17 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 {
; PC64LE-LABEL: constrained_vector_ceil_v1f32:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: addis 3, 2, .LCPI103_0 at toc@ha
-; PC64LE-NEXT: lfs 0, .LCPI103_0 at toc@l(3)
-; PC64LE-NEXT: xsrdpip 0, 0
-; PC64LE-NEXT: xscvdpspn 0, 0
-; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT: addi 3, 3, .LCPI103_0 at toc@l
+; PC64LE-NEXT: lfiwzx 0, 0, 3
+; PC64LE-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_ceil_v1f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI103_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI103_0 at toc@l(3)
-; PC64LE9-NEXT: xsrdpip 0, 0
-; PC64LE9-NEXT: xscvdpspn 0, 0
-; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT: addi 3, 3, .LCPI103_0 at toc@l
+; PC64LE9-NEXT: lfiwzx 0, 0, 3
+; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE9-NEXT: blr
entry:
%ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
@@ -6400,16 +6398,14 @@ define <2 x double> @constrained_vector_ceil_v2f64() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI104_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI104_0 at toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xvrdpip 34, 0
+; PC64LE-NEXT: xxswapd 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_ceil_v2f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI104_0 at toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI104_0 at toc@l
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpip 34, 0
+; PC64LE9-NEXT: lxvx 34, 0, 3
; PC64LE9-NEXT: blr
entry:
%ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
@@ -6421,50 +6417,16 @@ entry:
define <3 x float> @constrained_vector_ceil_v3f32() #0 {
; PC64LE-LABEL: constrained_vector_ceil_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI105_2 at toc@ha
-; PC64LE-NEXT: addis 4, 2, .LCPI105_1 at toc@ha
-; PC64LE-NEXT: lfs 0, .LCPI105_2 at toc@l(3)
-; PC64LE-NEXT: lfs 1, .LCPI105_1 at toc@l(4)
; PC64LE-NEXT: addis 3, 2, .LCPI105_0 at toc@ha
-; PC64LE-NEXT: xsrdpip 0, 0
-; PC64LE-NEXT: lfs 2, .LCPI105_0 at toc@l(3)
-; PC64LE-NEXT: addis 3, 2, .LCPI105_3 at toc@ha
-; PC64LE-NEXT: xsrdpip 1, 1
-; PC64LE-NEXT: addi 3, 3, .LCPI105_3 at toc@l
-; PC64LE-NEXT: xsrdpip 2, 2
-; PC64LE-NEXT: xscvdpspn 0, 0
-; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT: xscvdpspn 0, 2
-; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT: vmrglw 2, 3, 2
-; PC64LE-NEXT: lvx 3, 0, 3
-; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT: vperm 2, 4, 2, 3
+; PC64LE-NEXT: addi 3, 3, .LCPI105_0 at toc@l
+; PC64LE-NEXT: lvx 2, 0, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_ceil_v3f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI105_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI105_0 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI105_1 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI105_1 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI105_2 at toc@ha
-; PC64LE9-NEXT: xsrdpip 0, 0
-; PC64LE9-NEXT: lfs 2, .LCPI105_2 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI105_3 at toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI105_3 at toc@l
-; PC64LE9-NEXT: xsrdpip 1, 1
-; PC64LE9-NEXT: xsrdpip 2, 2
-; PC64LE9-NEXT: xscvdpspn 0, 0
-; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xscvdpspn 2, 2
-; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT: vmrglw 2, 3, 2
-; PC64LE9-NEXT: lxvx 35, 0, 3
-; PC64LE9-NEXT: vperm 2, 4, 2, 3
+; PC64LE9-NEXT: addi 3, 3, .LCPI105_0 at toc@l
+; PC64LE9-NEXT: lxvx 34, 0, 3
; PC64LE9-NEXT: blr
entry:
%ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
@@ -6476,31 +6438,18 @@ entry:
define <3 x double> @constrained_vector_ceil_v3f64() #0 {
; PC64LE-LABEL: constrained_vector_ceil_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI106_1 at toc@ha
-; PC64LE-NEXT: addi 3, 3, .LCPI106_1 at toc@l
-; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: addis 3, 2, .LCPI106_0 at toc@ha
; PC64LE-NEXT: lfs 1, .LCPI106_0 at toc@l(3)
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xsrdpip 3, 1
-; PC64LE-NEXT: xvrdpip 2, 0
-; PC64LE-NEXT: xxswapd 1, 2
-; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT: fmr 2, 1
+; PC64LE-NEXT: fmr 3, 1
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI106_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI106_0 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI106_1 at toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI106_1 at toc@l
-; PC64LE9-NEXT: xsrdpip 3, 0
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpip 2, 0
-; PC64LE9-NEXT: xxswapd 1, 2
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT: lfs 1, .LCPI106_0 at toc@l(3)
+; PC64LE9-NEXT: fmr 2, 1
+; PC64LE9-NEXT: fmr 3, 1
; PC64LE9-NEXT: blr
entry:
%ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
@@ -6513,19 +6462,17 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 {
; PC64LE-LABEL: constrained_vector_floor_v1f32:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: addis 3, 2, .LCPI107_0 at toc@ha
-; PC64LE-NEXT: lfs 0, .LCPI107_0 at toc@l(3)
-; PC64LE-NEXT: xsrdpim 0, 0
-; PC64LE-NEXT: xscvdpspn 0, 0
-; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT: addi 3, 3, .LCPI107_0 at toc@l
+; PC64LE-NEXT: lfiwzx 0, 0, 3
+; PC64LE-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_floor_v1f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI107_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI107_0 at toc@l(3)
-; PC64LE9-NEXT: xsrdpim 0, 0
-; PC64LE9-NEXT: xscvdpspn 0, 0
-; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT: addi 3, 3, .LCPI107_0 at toc@l
+; PC64LE9-NEXT: lfiwzx 0, 0, 3
+; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE9-NEXT: blr
entry:
%floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
@@ -6541,16 +6488,14 @@ define <2 x double> @constrained_vector_floor_v2f64() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI108_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI108_0 at toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xvrdpim 34, 0
+; PC64LE-NEXT: xxswapd 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_floor_v2f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI108_0 at toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI108_0 at toc@l
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpim 34, 0
+; PC64LE9-NEXT: lxvx 34, 0, 3
; PC64LE9-NEXT: blr
entry:
%floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
@@ -6562,50 +6507,16 @@ entry:
define <3 x float> @constrained_vector_floor_v3f32() #0 {
; PC64LE-LABEL: constrained_vector_floor_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI109_2 at toc@ha
-; PC64LE-NEXT: addis 4, 2, .LCPI109_1 at toc@ha
-; PC64LE-NEXT: lfs 0, .LCPI109_2 at toc@l(3)
-; PC64LE-NEXT: lfs 1, .LCPI109_1 at toc@l(4)
; PC64LE-NEXT: addis 3, 2, .LCPI109_0 at toc@ha
-; PC64LE-NEXT: xsrdpim 0, 0
-; PC64LE-NEXT: lfs 2, .LCPI109_0 at toc@l(3)
-; PC64LE-NEXT: addis 3, 2, .LCPI109_3 at toc@ha
-; PC64LE-NEXT: xsrdpim 1, 1
-; PC64LE-NEXT: addi 3, 3, .LCPI109_3 at toc@l
-; PC64LE-NEXT: xsrdpim 2, 2
-; PC64LE-NEXT: xscvdpspn 0, 0
-; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT: xscvdpspn 0, 2
-; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT: vmrglw 2, 3, 2
-; PC64LE-NEXT: lvx 3, 0, 3
-; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT: vperm 2, 4, 2, 3
+; PC64LE-NEXT: addi 3, 3, .LCPI109_0 at toc@l
+; PC64LE-NEXT: lvx 2, 0, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_floor_v3f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI109_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI109_0 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI109_1 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI109_1 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI109_2 at toc@ha
-; PC64LE9-NEXT: xsrdpim 0, 0
-; PC64LE9-NEXT: lfs 2, .LCPI109_2 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI109_3 at toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI109_3 at toc@l
-; PC64LE9-NEXT: xsrdpim 1, 1
-; PC64LE9-NEXT: xsrdpim 2, 2
-; PC64LE9-NEXT: xscvdpspn 0, 0
-; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xscvdpspn 2, 2
-; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT: vmrglw 2, 3, 2
-; PC64LE9-NEXT: lxvx 35, 0, 3
-; PC64LE9-NEXT: vperm 2, 4, 2, 3
+; PC64LE9-NEXT: addi 3, 3, .LCPI109_0 at toc@l
+; PC64LE9-NEXT: lxvx 34, 0, 3
; PC64LE9-NEXT: blr
entry:
%floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
@@ -6617,31 +6528,18 @@ entry:
define <3 x double> @constrained_vector_floor_v3f64() #0 {
; PC64LE-LABEL: constrained_vector_floor_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI110_1 at toc@ha
-; PC64LE-NEXT: addi 3, 3, .LCPI110_1 at toc@l
-; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: addis 3, 2, .LCPI110_0 at toc@ha
; PC64LE-NEXT: lfs 1, .LCPI110_0 at toc@l(3)
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xsrdpim 3, 1
-; PC64LE-NEXT: xvrdpim 2, 0
-; PC64LE-NEXT: xxswapd 1, 2
-; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT: fmr 2, 1
+; PC64LE-NEXT: fmr 3, 1
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_floor_v3f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI110_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI110_0 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI110_1 at toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI110_1 at toc@l
-; PC64LE9-NEXT: xsrdpim 3, 0
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpim 2, 0
-; PC64LE9-NEXT: xxswapd 1, 2
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT: lfs 1, .LCPI110_0 at toc@l(3)
+; PC64LE9-NEXT: fmr 2, 1
+; PC64LE9-NEXT: fmr 3, 1
; PC64LE9-NEXT: blr
entry:
%floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
@@ -6654,19 +6552,17 @@ define <1 x float> @constrained_vector_round_v1f32() #0 {
; PC64LE-LABEL: constrained_vector_round_v1f32:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: addis 3, 2, .LCPI111_0 at toc@ha
-; PC64LE-NEXT: lfs 0, .LCPI111_0 at toc@l(3)
-; PC64LE-NEXT: xsrdpi 0, 0
-; PC64LE-NEXT: xscvdpspn 0, 0
-; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT: addi 3, 3, .LCPI111_0 at toc@l
+; PC64LE-NEXT: lfiwzx 0, 0, 3
+; PC64LE-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_round_v1f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI111_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI111_0 at toc@l(3)
-; PC64LE9-NEXT: xsrdpi 0, 0
-; PC64LE9-NEXT: xscvdpspn 0, 0
-; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT: addi 3, 3, .LCPI111_0 at toc@l
+; PC64LE9-NEXT: lfiwzx 0, 0, 3
+; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE9-NEXT: blr
entry:
%round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
@@ -6681,16 +6577,14 @@ define <2 x double> @constrained_vector_round_v2f64() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI112_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI112_0 at toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xvrdpi 34, 0
+; PC64LE-NEXT: xxswapd 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_round_v2f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI112_0 at toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI112_0 at toc@l
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpi 34, 0
+; PC64LE9-NEXT: lxvx 34, 0, 3
; PC64LE9-NEXT: blr
entry:
%round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
@@ -6702,50 +6596,16 @@ entry:
define <3 x float> @constrained_vector_round_v3f32() #0 {
; PC64LE-LABEL: constrained_vector_round_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI113_2 at toc@ha
-; PC64LE-NEXT: addis 4, 2, .LCPI113_1 at toc@ha
-; PC64LE-NEXT: lfs 0, .LCPI113_2 at toc@l(3)
-; PC64LE-NEXT: lfs 1, .LCPI113_1 at toc@l(4)
; PC64LE-NEXT: addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE-NEXT: xsrdpi 0, 0
-; PC64LE-NEXT: lfs 2, .LCPI113_0 at toc@l(3)
-; PC64LE-NEXT: addis 3, 2, .LCPI113_3 at toc@ha
-; PC64LE-NEXT: xsrdpi 1, 1
-; PC64LE-NEXT: addi 3, 3, .LCPI113_3 at toc@l
-; PC64LE-NEXT: xsrdpi 2, 2
-; PC64LE-NEXT: xscvdpspn 0, 0
-; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT: xscvdpspn 0, 2
-; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT: vmrglw 2, 3, 2
-; PC64LE-NEXT: lvx 3, 0, 3
-; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT: vperm 2, 4, 2, 3
+; PC64LE-NEXT: addi 3, 3, .LCPI113_0 at toc@l
+; PC64LE-NEXT: lvx 2, 0, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_round_v3f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI113_0 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI113_1 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI113_1 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI113_2 at toc@ha
-; PC64LE9-NEXT: xsrdpi 0, 0
-; PC64LE9-NEXT: lfs 2, .LCPI113_2 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI113_3 at toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI113_3 at toc@l
-; PC64LE9-NEXT: xsrdpi 1, 1
-; PC64LE9-NEXT: xsrdpi 2, 2
-; PC64LE9-NEXT: xscvdpspn 0, 0
-; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xscvdpspn 2, 2
-; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT: vmrglw 2, 3, 2
-; PC64LE9-NEXT: lxvx 35, 0, 3
-; PC64LE9-NEXT: vperm 2, 4, 2, 3
+; PC64LE9-NEXT: addi 3, 3, .LCPI113_0 at toc@l
+; PC64LE9-NEXT: lxvx 34, 0, 3
; PC64LE9-NEXT: blr
entry:
%round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
@@ -6758,31 +6618,20 @@ entry:
define <3 x double> @constrained_vector_round_v3f64() #0 {
; PC64LE-LABEL: constrained_vector_round_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI114_1 at toc@ha
-; PC64LE-NEXT: addi 3, 3, .LCPI114_1 at toc@l
-; PC64LE-NEXT: lxvd2x 0, 0, 3
+; PC64LE-NEXT: addis 4, 2, .LCPI114_1 at toc@ha
; PC64LE-NEXT: addis 3, 2, .LCPI114_0 at toc@ha
+; PC64LE-NEXT: lfs 2, .LCPI114_1 at toc@l(4)
; PC64LE-NEXT: lfs 1, .LCPI114_0 at toc@l(3)
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xsrdpi 3, 1
-; PC64LE-NEXT: xvrdpi 2, 0
-; PC64LE-NEXT: xxswapd 1, 2
-; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT: fmr 3, 2
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_round_v3f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI114_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI114_0 at toc@l(3)
+; PC64LE9-NEXT: lfs 1, .LCPI114_0 at toc@l(3)
; PC64LE9-NEXT: addis 3, 2, .LCPI114_1 at toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI114_1 at toc@l
-; PC64LE9-NEXT: xsrdpi 3, 0
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpi 2, 0
-; PC64LE9-NEXT: xxswapd 1, 2
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT: lfs 2, .LCPI114_1 at toc@l(3)
+; PC64LE9-NEXT: fmr 3, 2
; PC64LE9-NEXT: blr
entry:
%round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
@@ -6795,19 +6644,17 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 {
; PC64LE-LABEL: constrained_vector_trunc_v1f32:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: addis 3, 2, .LCPI115_0 at toc@ha
-; PC64LE-NEXT: lfs 0, .LCPI115_0 at toc@l(3)
-; PC64LE-NEXT: xsrdpiz 0, 0
-; PC64LE-NEXT: xscvdpspn 0, 0
-; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT: addi 3, 3, .LCPI115_0 at toc@l
+; PC64LE-NEXT: lfiwzx 0, 0, 3
+; PC64LE-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_trunc_v1f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI115_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI115_0 at toc@l(3)
-; PC64LE9-NEXT: xsrdpiz 0, 0
-; PC64LE9-NEXT: xscvdpspn 0, 0
-; PC64LE9-NEXT: xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT: addi 3, 3, .LCPI115_0 at toc@l
+; PC64LE9-NEXT: lfiwzx 0, 0, 3
+; PC64LE9-NEXT: xxpermdi 34, 0, 0, 2
; PC64LE9-NEXT: blr
entry:
%trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
@@ -6822,16 +6669,14 @@ define <2 x double> @constrained_vector_trunc_v2f64() #0 {
; PC64LE-NEXT: addis 3, 2, .LCPI116_0 at toc@ha
; PC64LE-NEXT: addi 3, 3, .LCPI116_0 at toc@l
; PC64LE-NEXT: lxvd2x 0, 0, 3
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xvrdpiz 34, 0
+; PC64LE-NEXT: xxswapd 34, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_trunc_v2f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI116_0 at toc@ha
; PC64LE9-NEXT: addi 3, 3, .LCPI116_0 at toc@l
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpiz 34, 0
+; PC64LE9-NEXT: lxvx 34, 0, 3
; PC64LE9-NEXT: blr
entry:
%trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
@@ -6843,50 +6688,16 @@ entry:
define <3 x float> @constrained_vector_trunc_v3f32() #0 {
; PC64LE-LABEL: constrained_vector_trunc_v3f32:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI117_2 at toc@ha
-; PC64LE-NEXT: addis 4, 2, .LCPI117_1 at toc@ha
-; PC64LE-NEXT: lfs 0, .LCPI117_2 at toc@l(3)
-; PC64LE-NEXT: lfs 1, .LCPI117_1 at toc@l(4)
; PC64LE-NEXT: addis 3, 2, .LCPI117_0 at toc@ha
-; PC64LE-NEXT: xsrdpiz 0, 0
-; PC64LE-NEXT: lfs 2, .LCPI117_0 at toc@l(3)
-; PC64LE-NEXT: addis 3, 2, .LCPI117_3 at toc@ha
-; PC64LE-NEXT: xsrdpiz 1, 1
-; PC64LE-NEXT: addi 3, 3, .LCPI117_3 at toc@l
-; PC64LE-NEXT: xsrdpiz 2, 2
-; PC64LE-NEXT: xscvdpspn 0, 0
-; PC64LE-NEXT: xscvdpspn 1, 1
-; PC64LE-NEXT: xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT: xscvdpspn 0, 2
-; PC64LE-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT: vmrglw 2, 3, 2
-; PC64LE-NEXT: lvx 3, 0, 3
-; PC64LE-NEXT: xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT: vperm 2, 4, 2, 3
+; PC64LE-NEXT: addi 3, 3, .LCPI117_0 at toc@l
+; PC64LE-NEXT: lvx 2, 0, 3
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_trunc_v3f32:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI117_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI117_0 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI117_1 at toc@ha
-; PC64LE9-NEXT: lfs 1, .LCPI117_1 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI117_2 at toc@ha
-; PC64LE9-NEXT: xsrdpiz 0, 0
-; PC64LE9-NEXT: lfs 2, .LCPI117_2 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI117_3 at toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI117_3 at toc@l
-; PC64LE9-NEXT: xsrdpiz 1, 1
-; PC64LE9-NEXT: xsrdpiz 2, 2
-; PC64LE9-NEXT: xscvdpspn 0, 0
-; PC64LE9-NEXT: xscvdpspn 1, 1
-; PC64LE9-NEXT: xscvdpspn 2, 2
-; PC64LE9-NEXT: xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT: xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT: xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT: vmrglw 2, 3, 2
-; PC64LE9-NEXT: lxvx 35, 0, 3
-; PC64LE9-NEXT: vperm 2, 4, 2, 3
+; PC64LE9-NEXT: addi 3, 3, .LCPI117_0 at toc@l
+; PC64LE9-NEXT: lxvx 34, 0, 3
; PC64LE9-NEXT: blr
entry:
%trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
@@ -6898,31 +6709,18 @@ entry:
define <3 x double> @constrained_vector_trunc_v3f64() #0 {
; PC64LE-LABEL: constrained_vector_trunc_v3f64:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: addis 3, 2, .LCPI118_1 at toc@ha
-; PC64LE-NEXT: addi 3, 3, .LCPI118_1 at toc@l
-; PC64LE-NEXT: lxvd2x 0, 0, 3
; PC64LE-NEXT: addis 3, 2, .LCPI118_0 at toc@ha
; PC64LE-NEXT: lfs 1, .LCPI118_0 at toc@l(3)
-; PC64LE-NEXT: xxswapd 0, 0
-; PC64LE-NEXT: xsrdpiz 3, 1
-; PC64LE-NEXT: xvrdpiz 2, 0
-; PC64LE-NEXT: xxswapd 1, 2
-; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT: fmr 2, 1
+; PC64LE-NEXT: fmr 3, 1
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: addis 3, 2, .LCPI118_0 at toc@ha
-; PC64LE9-NEXT: lfs 0, .LCPI118_0 at toc@l(3)
-; PC64LE9-NEXT: addis 3, 2, .LCPI118_1 at toc@ha
-; PC64LE9-NEXT: addi 3, 3, .LCPI118_1 at toc@l
-; PC64LE9-NEXT: xsrdpiz 3, 0
-; PC64LE9-NEXT: lxvx 0, 0, 3
-; PC64LE9-NEXT: xvrdpiz 2, 0
-; PC64LE9-NEXT: xxswapd 1, 2
-; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT: lfs 1, .LCPI118_0 at toc@l(3)
+; PC64LE9-NEXT: fmr 2, 1
+; PC64LE9-NEXT: fmr 3, 1
; PC64LE9-NEXT: blr
entry:
%trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
new file mode 100644
index 000000000000..d53de99fe0b5
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
@@ -0,0 +1,244 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+
+; Verify that floor(10.1) is folded to 10.0 when the exception behavior is 'ignore'.
+define double @floor_01() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.floor.f64(
+ double 1.010000e+01,
+ metadata !"fpexcept.ignore") #0
+ ret double %result
+ ; CHECK-LABEL: @floor_01
+ ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that floor(-10.1) is folded to -11.0 when the exception behavior is not 'ignore'.
+define double @floor_02() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.floor.f64(
+ double -1.010000e+01,
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @floor_02
+ ; CHECK: ret double -1.100000e+01
+}
+
+; Verify that ceil(10.1) is folded to 11.0 when the exception behavior is 'ignore'.
+define double @ceil_01() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.ceil.f64(
+ double 1.010000e+01,
+ metadata !"fpexcept.ignore") #0
+ ret double %result
+ ; CHECK-LABEL: @ceil_01
+ ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that ceil(-10.1) is folded to -10.0 when the exception behavior is not 'ignore'.
+define double @ceil_02() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.ceil.f64(
+ double -1.010000e+01,
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @ceil_02
+ ; CHECK: ret double -1.000000e+01
+}
+
+; Verify that trunc(10.1) is folded to 10.0 when the exception behavior is 'ignore'.
+define double @trunc_01() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.trunc.f64(
+ double 1.010000e+01,
+ metadata !"fpexcept.ignore") #0
+ ret double %result
+ ; CHECK-LABEL: @trunc_01
+ ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that trunc(-10.1) is folded to -10.0 when the exception behavior is NOT 'ignore'.
+define double @trunc_02() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.trunc.f64(
+ double -1.010000e+01,
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @trunc_02
+ ; CHECK: ret double -1.000000e+01
+}
+
+; Verify that round(10.5) is folded to 11.0 when the exception behavior is 'ignore'.
+define double @round_01() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.round.f64(
+ double 1.050000e+01,
+ metadata !"fpexcept.ignore") #0
+ ret double %result
+ ; CHECK-LABEL: @round_01
+ ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that floor(-10.5) is folded to -11.0 when the exception behavior is NOT 'ignore'.
+define double @round_02() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.round.f64(
+ double -1.050000e+01,
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @round_02
+ ; CHECK: ret double -1.100000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 11.0 when the rounding mode is 'upward'.
+define double @nearbyint_01() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.nearbyint.f64(
+ double 1.050000e+01,
+ metadata !"round.upward",
+ metadata !"fpexcept.ignore") #0
+ ret double %result
+ ; CHECK-LABEL: @nearbyint_01
+ ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'downward'.
+define double @nearbyint_02() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.nearbyint.f64(
+ double 1.050000e+01,
+ metadata !"round.downward",
+ metadata !"fpexcept.maytrap") #0
+ ret double %result
+ ; CHECK-LABEL: @nearbyint_02
+ ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'towardzero'.
+define double @nearbyint_03() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.nearbyint.f64(
+ double 1.050000e+01,
+ metadata !"round.towardzero",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @nearbyint_03
+ ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'tonearest'.
+define double @nearbyint_04() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.nearbyint.f64(
+ double 1.050000e+01,
+ metadata !"round.tonearest",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @nearbyint_04
+ ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is NOT folded if the rounding mode is 'dynamic'.
+define double @nearbyint_05() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.nearbyint.f64(
+ double 1.050000e+01,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @nearbyint_05
+ ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.nearbyint
+ ; CHECK: ret double [[VAL]]
+}
+
+; Verify that trunc(SNAN) is NOT folded if the exception behavior mode is not 'ignore'.
+define double @nonfinite_01() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.trunc.f64(
+ double 0x7ff4000000000000,
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @nonfinite_01
+ ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.trunc
+ ; CHECK: ret double [[VAL]]
+}
+
+; Verify that trunc(SNAN) is folded to QNAN if the exception behavior mode is 'ignore'.
+define double @nonfinite_02() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.trunc.f64(
+ double 0x7ff4000000000000,
+ metadata !"fpexcept.ignore") #0
+ ret double %result
+ ; CHECK-LABEL: @nonfinite_02
+ ; CHECK: ret double 0x7FF8000000000000
+}
+
+; Verify that trunc(QNAN) is folded even if the exception behavior mode is not 'ignore'.
+define double @nonfinite_03() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.trunc.f64(
+ double 0x7ff8000000000000,
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @nonfinite_03
+ ; CHECK: ret double 0x7FF8000000000000
+}
+
+; Verify that trunc(+Inf) is folded even if the exception behavior mode is not 'ignore'.
+define double @nonfinite_04() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.trunc.f64(
+ double 0x7ff0000000000000,
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @nonfinite_04
+ ; CHECK: ret double 0x7FF0000000000000
+}
+
+; Verify that rint(10) is folded to 10.0 when the rounding mode is 'tonearest'.
+define double @rint_01() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.rint.f64(
+ double 1.000000e+01,
+ metadata !"round.tonearest",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @rint_01
+ ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that rint(10.1) is NOT folded to 10.0 when the exception behavior is 'strict'.
+define double @rint_02() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.rint.f64(
+ double 1.010000e+01,
+ metadata !"round.tonearest",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+ ; CHECK-LABEL: @rint_02
+ ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.rint
+ ; CHECK: ret double [[VAL]]
+}
+
+; Verify that rint(10.1) is folded to 10.0 when the exception behavior is not 'strict'.
+define double @rint_03() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.rint.f64(
+ double 1.010000e+01,
+ metadata !"round.tonearest",
+ metadata !"fpexcept.maytrap") #0
+ ret double %result
+ ; CHECK-LABEL: @rint_03
+ ; CHECK: ret double 1.000000e+01
+}
+
+
+attributes #0 = { strictfp }
+
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.floor.f64(double, metadata)
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
+declare double @llvm.experimental.constrained.round.f64(double, metadata)
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+
More information about the llvm-commits
mailing list