[llvm] f398739 - [FEnv] Constfold some unary constrained operations

Fri Mar 27 22:31:10 PDT 2020

Author: Serge Pavlov
Date: 2020-03-28T12:28:33+07:00
New Revision: f398739152948377b81d9ce393d62c18e105699c

URL: https://github.com/llvm/llvm-project/commit/f398739152948377b81d9ce393d62c18e105699c
DIFF: https://github.com/llvm/llvm-project/commit/f398739152948377b81d9ce393d62c18e105699c.diff

LOG: [FEnv] Constfold some unary constrained operations

This change implements constant folding to constrained versions of
intrinsics, implementing rounding: floor, ceil, trunc, round, rint and
nearbyint.

Differential Revision: https://reviews.llvm.org/D72930

Added: 
    llvm/test/Transforms/InstSimplify/constfold-constrained.ll

Modified: 
    llvm/include/llvm/IR/FPEnv.h
    llvm/lib/Analysis/ConstantFolding.cpp
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/lib/IR/FPEnv.cpp
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h
index a1e0665d4112..0e7e96ff700a 100644

--- a/llvm/include/llvm/IR/FPEnv.h
+++ b/llvm/include/llvm/IR/FPEnv.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_IR_FLOATINGPOINT_H
 #define LLVM_IR_FLOATINGPOINT_H
 
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include <stdint.h>
@@ -66,5 +67,8 @@ Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef);
 /// input in constrained intrinsic exception behavior metadata.
 Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior);
 
+/// Converts rounding mode represented by fp::RoundingMode to the rounding mode
+/// index used by APFloat. For fp::rmDynamic it returns None.
+Optional<APFloatBase::roundingMode> getAPFloatRoundingMode(fp::RoundingMode);
 }
 #endif

diff  --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 0adabf279178..5efebe2937ed 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsX86.h"
@@ -1396,41 +1397,19 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
 //
 
 bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
-  if (Call->isNoBuiltin() || Call->isStrictFP())
+  if (Call->isNoBuiltin())
     return false;
   switch (F->getIntrinsicID()) {
-  case Intrinsic::fabs:
-  case Intrinsic::minnum:
-  case Intrinsic::maxnum:
-  case Intrinsic::minimum:
-  case Intrinsic::maximum:
-  case Intrinsic::log:
-  case Intrinsic::log2:
-  case Intrinsic::log10:
-  case Intrinsic::exp:
-  case Intrinsic::exp2:
-  case Intrinsic::floor:
-  case Intrinsic::ceil:
-  case Intrinsic::sqrt:
-  case Intrinsic::sin:
-  case Intrinsic::cos:
-  case Intrinsic::trunc:
-  case Intrinsic::rint:
-  case Intrinsic::nearbyint:
-  case Intrinsic::pow:
-  case Intrinsic::powi:
+  // Operations that do not operate floating-point numbers and do not depend on
+  // FP environment can be folded even in strictfp functions.
   case Intrinsic::bswap:
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
   case Intrinsic::fshl:
   case Intrinsic::fshr:
-  case Intrinsic::fma:
-  case Intrinsic::fmuladd:
-  case Intrinsic::copysign:
   case Intrinsic::launder_invariant_group:
   case Intrinsic::strip_invariant_group:
-  case Intrinsic::round:
   case Intrinsic::masked_load:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow:
@@ -1444,9 +1423,31 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::usub_sat:
   case Intrinsic::smul_fix:
   case Intrinsic::smul_fix_sat:
+  case Intrinsic::bitreverse:
+  case Intrinsic::is_constant:
+    return true;
+
+  // Floating point operations cannot be folded in strictfp functions in
+  // general case. They can be folded if FP environment is known to compiler.
+  case Intrinsic::minnum:
+  case Intrinsic::maxnum:
+  case Intrinsic::minimum:
+  case Intrinsic::maximum:
+  case Intrinsic::log:
+  case Intrinsic::log2:
+  case Intrinsic::log10:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+  case Intrinsic::sqrt:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
+  case Intrinsic::pow:
+  case Intrinsic::powi:
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd:
   case Intrinsic::convert_from_fp16:
   case Intrinsic::convert_to_fp16:
-  case Intrinsic::bitreverse:
+  // The intrinsics below depend on rounding mode in MXCSR.
   case Intrinsic::amdgcn_cubeid:
   case Intrinsic::amdgcn_cubema:
   case Intrinsic::amdgcn_cubesc:
@@ -1477,14 +1478,35 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::x86_avx512_vcvtsd2usi64:
   case Intrinsic::x86_avx512_cvttsd2usi:
   case Intrinsic::x86_avx512_cvttsd2usi64:
-  case Intrinsic::is_constant:
+    return !Call->isStrictFP();
+
+  // Sign operations are actually bitwise operations, they do not raise
+  // exceptions even for SNANs.
+  case Intrinsic::fabs:
+  case Intrinsic::copysign:
+  // Non-constrained variants of rounding operations means default FP
+  // environment, they can be folded in any case.
+  case Intrinsic::ceil:
+  case Intrinsic::floor:
+  case Intrinsic::round:
+  case Intrinsic::trunc:
+  case Intrinsic::nearbyint:
+  case Intrinsic::rint:
+  // Constrained intrinsics can be folded if FP environment is known
+  // to compiler.
+  case Intrinsic::experimental_constrained_ceil:
+  case Intrinsic::experimental_constrained_floor:
+  case Intrinsic::experimental_constrained_round:
+  case Intrinsic::experimental_constrained_trunc:
+  case Intrinsic::experimental_constrained_nearbyint:
+  case Intrinsic::experimental_constrained_rint:
     return true;
   default:
     return false;
   case Intrinsic::not_intrinsic: break;
   }
 
-  if (!F->hasName())
+  if (!F->hasName() || Call->isStrictFP())
     return false;
 
   // In these cases, the check of the length is required.  We don't want to
@@ -1792,6 +1814,55 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
       return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
     }
 
+    // Rounding operations (floor, trunc, ceil, round and nearbyint) do not
+    // raise FP exceptions, unless the argument is signaling NaN.
+
+    Optional<APFloat::roundingMode> RM;
+    switch (IntrinsicID) {
+    default:
+      break;
+    case Intrinsic::experimental_constrained_nearbyint:
+    case Intrinsic::experimental_constrained_rint: {
+      auto CI = cast<ConstrainedFPIntrinsic>(Call);
+      Optional<fp::RoundingMode> RMOp = CI->getRoundingMode();
+      if (RMOp)
+        RM = getAPFloatRoundingMode(*RMOp);
+      if (!RM)
+        return nullptr;
+      break;
+    }
+    case Intrinsic::experimental_constrained_round:
+      RM = APFloat::rmNearestTiesToAway;
+      break;
+    case Intrinsic::experimental_constrained_ceil:
+      RM = APFloat::rmTowardPositive;
+      break;
+    case Intrinsic::experimental_constrained_floor:
+      RM = APFloat::rmTowardNegative;
+      break;
+    case Intrinsic::experimental_constrained_trunc:
+      RM = APFloat::rmTowardZero;
+      break;
+    }
+    if (RM) {
+      auto CI = cast<ConstrainedFPIntrinsic>(Call);
+      if (U.isFinite()) {
+        APFloat::opStatus St = U.roundToIntegral(*RM);
+        if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
+            St == APFloat::opInexact) {
+          Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+          if (EB && *EB == fp::ebStrict)
+            return nullptr;
+        }
+      } else if (U.isSignaling()) {
+        Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+        if (EB && *EB != fp::ebIgnore)
+          return nullptr;
+        U = APFloat::getQNaN(U.getSemantics());
+      }
+      return ConstantFP::get(Ty->getContext(), U);
+    }
+
     /// We only fold functions with finite arguments. Folding NaN and inf is
     /// likely to be aborted with an exception anyway, and some host libms
     /// have known errors raising exceptions.
@@ -2573,7 +2644,7 @@ static Constant *ConstantFoldVectorCall(StringRef Name,
 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
                                  ArrayRef<Constant *> Operands,
                                  const TargetLibraryInfo *TLI) {
-  if (Call->isNoBuiltin() || Call->isStrictFP())
+  if (Call->isNoBuiltin())
     return nullptr;
   if (!F->hasName())
     return nullptr;

diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index ba1f55048700..e62ddf793a13 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5368,8 +5368,11 @@ Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
   ConstantArgs.reserve(NumArgs);
   for (auto &Arg : Call->args()) {
     Constant *C = dyn_cast<Constant>(&Arg);
-    if (!C)
+    if (!C) {
+      if (isa<MetadataAsValue>(Arg.get()))
+        continue;
       return nullptr;
+    }
     ConstantArgs.push_back(C);
   }
 

diff  --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp
index 49b6d9a8dac3..ab68f55f834d 100644
--- a/llvm/lib/IR/FPEnv.cpp
+++ b/llvm/lib/IR/FPEnv.cpp
@@ -75,4 +75,20 @@ Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
   return ExceptStr;
 }
 
+Optional<APFloatBase::roundingMode>
+getAPFloatRoundingMode(fp::RoundingMode RM) {
+  switch (RM) {
+  case fp::rmDynamic:
+    return None;
+  case fp::rmToNearest:
+    return APFloat::rmNearestTiesToEven;
+  case fp::rmDownward:
+    return APFloat::rmTowardNegative;
+  case fp::rmUpward:
+    return APFloat::rmTowardPositive;
+  case fp::rmTowardZero:
+    return APFloat::rmTowardZero;
+  }
+  llvm_unreachable("Unexpected rounding mode");
+}
 }

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 0d8795c82914..1e5000a9854f 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -6373,19 +6373,17 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 {
 ; PC64LE-LABEL: constrained_vector_ceil_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    addis 3, 2, .LCPI103_0 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI103_0 at toc@l(3)
-; PC64LE-NEXT:    xsrdpip 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI103_0 at toc@l
+; PC64LE-NEXT:    lfiwzx 0, 0, 3
+; PC64LE-NEXT:    xxpermdi 34, 0, 0, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI103_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI103_0 at toc@l(3)
-; PC64LE9-NEXT:    xsrdpip 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI103_0 at toc@l
+; PC64LE9-NEXT:    lfiwzx 0, 0, 3
+; PC64LE9-NEXT:    xxpermdi 34, 0, 0, 2
 ; PC64LE9-NEXT:    blr
 entry:
   %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
@@ -6400,16 +6398,14 @@ define <2 x double> @constrained_vector_ceil_v2f64() #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI104_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI104_0 at toc@l
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpip 34, 0
+; PC64LE-NEXT:    xxswapd 34, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI104_0 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI104_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpip 34, 0
+; PC64LE9-NEXT:    lxvx 34, 0, 3
 ; PC64LE9-NEXT:    blr
 entry:
   %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
@@ -6421,50 +6417,16 @@ entry:
 define <3 x float> @constrained_vector_ceil_v3f32() #0 {
 ; PC64LE-LABEL: constrained_vector_ceil_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI105_2 at toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI105_1 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI105_2 at toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI105_1 at toc@l(4)
 ; PC64LE-NEXT:    addis 3, 2, .LCPI105_0 at toc@ha
-; PC64LE-NEXT:    xsrdpip 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI105_0 at toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI105_3 at toc@ha
-; PC64LE-NEXT:    xsrdpip 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI105_3 at toc@l
-; PC64LE-NEXT:    xsrdpip 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI105_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI105_0 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_1 at toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI105_1 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_2 at toc@ha
-; PC64LE9-NEXT:    xsrdpip 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI105_2 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI105_3 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI105_3 at toc@l
-; PC64LE9-NEXT:    xsrdpip 1, 1
-; PC64LE9-NEXT:    xsrdpip 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 3, 3, .LCPI105_0 at toc@l
+; PC64LE9-NEXT:    lxvx 34, 0, 3
 ; PC64LE9-NEXT:    blr
 entry:
   %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
@@ -6476,31 +6438,18 @@ entry:
 define <3 x double> @constrained_vector_ceil_v3f64() #0 {
 ; PC64LE-LABEL: constrained_vector_ceil_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI106_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI106_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI106_0 at toc@ha
 ; PC64LE-NEXT:    lfs 1, .LCPI106_0 at toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpip 3, 1
-; PC64LE-NEXT:    xvrdpip 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    fmr 2, 1
+; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_ceil_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI106_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI106_0 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI106_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI106_1 at toc@l
-; PC64LE9-NEXT:    xsrdpip 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpip 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    lfs 1, .LCPI106_0 at toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 1
+; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
@@ -6513,19 +6462,17 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 {
 ; PC64LE-LABEL: constrained_vector_floor_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    addis 3, 2, .LCPI107_0 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI107_0 at toc@l(3)
-; PC64LE-NEXT:    xsrdpim 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI107_0 at toc@l
+; PC64LE-NEXT:    lfiwzx 0, 0, 3
+; PC64LE-NEXT:    xxpermdi 34, 0, 0, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI107_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI107_0 at toc@l(3)
-; PC64LE9-NEXT:    xsrdpim 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI107_0 at toc@l
+; PC64LE9-NEXT:    lfiwzx 0, 0, 3
+; PC64LE9-NEXT:    xxpermdi 34, 0, 0, 2
 ; PC64LE9-NEXT:    blr
 entry:
   %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
@@ -6541,16 +6488,14 @@ define <2 x double> @constrained_vector_floor_v2f64() #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI108_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI108_0 at toc@l
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpim 34, 0
+; PC64LE-NEXT:    xxswapd 34, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI108_0 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI108_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpim 34, 0
+; PC64LE9-NEXT:    lxvx 34, 0, 3
 ; PC64LE9-NEXT:    blr
 entry:
   %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
@@ -6562,50 +6507,16 @@ entry:
 define <3 x float> @constrained_vector_floor_v3f32() #0 {
 ; PC64LE-LABEL: constrained_vector_floor_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI109_2 at toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI109_1 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI109_2 at toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI109_1 at toc@l(4)
 ; PC64LE-NEXT:    addis 3, 2, .LCPI109_0 at toc@ha
-; PC64LE-NEXT:    xsrdpim 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI109_0 at toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI109_3 at toc@ha
-; PC64LE-NEXT:    xsrdpim 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI109_3 at toc@l
-; PC64LE-NEXT:    xsrdpim 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI109_0 at toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI109_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI109_0 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_1 at toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI109_1 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_2 at toc@ha
-; PC64LE9-NEXT:    xsrdpim 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI109_2 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI109_3 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI109_3 at toc@l
-; PC64LE9-NEXT:    xsrdpim 1, 1
-; PC64LE9-NEXT:    xsrdpim 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 3, 3, .LCPI109_0 at toc@l
+; PC64LE9-NEXT:    lxvx 34, 0, 3
 ; PC64LE9-NEXT:    blr
 entry:
   %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
@@ -6617,31 +6528,18 @@ entry:
 define <3 x double> @constrained_vector_floor_v3f64() #0 {
 ; PC64LE-LABEL: constrained_vector_floor_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI110_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI110_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI110_0 at toc@ha
 ; PC64LE-NEXT:    lfs 1, .LCPI110_0 at toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpim 3, 1
-; PC64LE-NEXT:    xvrdpim 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    fmr 2, 1
+; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_floor_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI110_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI110_0 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI110_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI110_1 at toc@l
-; PC64LE9-NEXT:    xsrdpim 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpim 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    lfs 1, .LCPI110_0 at toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 1
+; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
@@ -6654,19 +6552,17 @@ define <1 x float> @constrained_vector_round_v1f32() #0 {
 ; PC64LE-LABEL: constrained_vector_round_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    addis 3, 2, .LCPI111_0 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI111_0 at toc@l(3)
-; PC64LE-NEXT:    xsrdpi 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI111_0 at toc@l
+; PC64LE-NEXT:    lfiwzx 0, 0, 3
+; PC64LE-NEXT:    xxpermdi 34, 0, 0, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI111_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI111_0 at toc@l(3)
-; PC64LE9-NEXT:    xsrdpi 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI111_0 at toc@l
+; PC64LE9-NEXT:    lfiwzx 0, 0, 3
+; PC64LE9-NEXT:    xxpermdi 34, 0, 0, 2
 ; PC64LE9-NEXT:    blr
 entry:
   %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
@@ -6681,16 +6577,14 @@ define <2 x double> @constrained_vector_round_v2f64() #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI112_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI112_0 at toc@l
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpi 34, 0
+; PC64LE-NEXT:    xxswapd 34, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI112_0 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI112_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpi 34, 0
+; PC64LE9-NEXT:    lxvx 34, 0, 3
 ; PC64LE9-NEXT:    blr
 entry:
   %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
@@ -6702,50 +6596,16 @@ entry:
 define <3 x float> @constrained_vector_round_v3f32() #0 {
 ; PC64LE-LABEL: constrained_vector_round_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI113_2 at toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI113_1 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI113_2 at toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI113_1 at toc@l(4)
 ; PC64LE-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE-NEXT:    xsrdpi 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI113_0 at toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI113_3 at toc@ha
-; PC64LE-NEXT:    xsrdpi 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI113_3 at toc@l
-; PC64LE-NEXT:    xsrdpi 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI113_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI113_0 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_1 at toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI113_1 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_2 at toc@ha
-; PC64LE9-NEXT:    xsrdpi 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI113_2 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI113_3 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI113_3 at toc@l
-; PC64LE9-NEXT:    xsrdpi 1, 1
-; PC64LE9-NEXT:    xsrdpi 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 3, 3, .LCPI113_0 at toc@l
+; PC64LE9-NEXT:    lxvx 34, 0, 3
 ; PC64LE9-NEXT:    blr
 entry:
   %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
@@ -6758,31 +6618,20 @@ entry:
 define <3 x double> @constrained_vector_round_v3f64() #0 {
 ; PC64LE-LABEL: constrained_vector_round_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI114_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI114_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    addis 4, 2, .LCPI114_1 at toc@ha
 ; PC64LE-NEXT:    addis 3, 2, .LCPI114_0 at toc@ha
+; PC64LE-NEXT:    lfs 2, .LCPI114_1 at toc@l(4)
 ; PC64LE-NEXT:    lfs 1, .LCPI114_0 at toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpi 3, 1
-; PC64LE-NEXT:    xvrdpi 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    fmr 3, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_round_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI114_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI114_0 at toc@l(3)
+; PC64LE9-NEXT:    lfs 1, .LCPI114_0 at toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI114_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI114_1 at toc@l
-; PC64LE9-NEXT:    xsrdpi 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpi 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    lfs 2, .LCPI114_1 at toc@l(3)
+; PC64LE9-NEXT:    fmr 3, 2
 ; PC64LE9-NEXT:    blr
 entry:
   %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
@@ -6795,19 +6644,17 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 {
 ; PC64LE-LABEL: constrained_vector_trunc_v1f32:
 ; PC64LE:       # %bb.0: # %entry
 ; PC64LE-NEXT:    addis 3, 2, .LCPI115_0 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI115_0 at toc@l(3)
-; PC64LE-NEXT:    xsrdpiz 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI115_0 at toc@l
+; PC64LE-NEXT:    lfiwzx 0, 0, 3
+; PC64LE-NEXT:    xxpermdi 34, 0, 0, 2
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI115_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI115_0 at toc@l(3)
-; PC64LE9-NEXT:    xsrdpiz 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 3, 3, .LCPI115_0 at toc@l
+; PC64LE9-NEXT:    lfiwzx 0, 0, 3
+; PC64LE9-NEXT:    xxpermdi 34, 0, 0, 2
 ; PC64LE9-NEXT:    blr
 entry:
   %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
@@ -6822,16 +6669,14 @@ define <2 x double> @constrained_vector_trunc_v2f64() #0 {
 ; PC64LE-NEXT:    addis 3, 2, .LCPI116_0 at toc@ha
 ; PC64LE-NEXT:    addi 3, 3, .LCPI116_0 at toc@l
 ; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpiz 34, 0
+; PC64LE-NEXT:    xxswapd 34, 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI116_0 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI116_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpiz 34, 0
+; PC64LE9-NEXT:    lxvx 34, 0, 3
 ; PC64LE9-NEXT:    blr
 entry:
   %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
@@ -6843,50 +6688,16 @@ entry:
 define <3 x float> @constrained_vector_trunc_v3f32() #0 {
 ; PC64LE-LABEL: constrained_vector_trunc_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI117_2 at toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI117_1 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI117_2 at toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI117_1 at toc@l(4)
 ; PC64LE-NEXT:    addis 3, 2, .LCPI117_0 at toc@ha
-; PC64LE-NEXT:    xsrdpiz 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI117_0 at toc@l(3)
-; PC64LE-NEXT:    addis 3, 2, .LCPI117_3 at toc@ha
-; PC64LE-NEXT:    xsrdpiz 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI117_3 at toc@l
-; PC64LE-NEXT:    xsrdpiz 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
-; PC64LE-NEXT:    xscvdpspn 1, 1
-; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
-; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
-; PC64LE-NEXT:    lvx 3, 0, 3
-; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 3, 3, .LCPI117_0 at toc@l
+; PC64LE-NEXT:    lvx 2, 0, 3
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI117_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI117_0 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_1 at toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI117_1 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_2 at toc@ha
-; PC64LE9-NEXT:    xsrdpiz 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI117_2 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI117_3 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI117_3 at toc@l
-; PC64LE9-NEXT:    xsrdpiz 1, 1
-; PC64LE9-NEXT:    xsrdpiz 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
-; PC64LE9-NEXT:    vmrglw 2, 3, 2
-; PC64LE9-NEXT:    lxvx 35, 0, 3
-; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 3, 3, .LCPI117_0 at toc@l
+; PC64LE9-NEXT:    lxvx 34, 0, 3
 ; PC64LE9-NEXT:    blr
 entry:
   %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
@@ -6898,31 +6709,18 @@ entry:
 define <3 x double> @constrained_vector_trunc_v3f64() #0 {
 ; PC64LE-LABEL: constrained_vector_trunc_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI118_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI118_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
 ; PC64LE-NEXT:    addis 3, 2, .LCPI118_0 at toc@ha
 ; PC64LE-NEXT:    lfs 1, .LCPI118_0 at toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpiz 3, 1
-; PC64LE-NEXT:    xvrdpiz 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    fmr 2, 1
+; PC64LE-NEXT:    fmr 3, 1
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_trunc_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI118_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI118_0 at toc@l(3)
-; PC64LE9-NEXT:    addis 3, 2, .LCPI118_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI118_1 at toc@l
-; PC64LE9-NEXT:    xsrdpiz 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpiz 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
-; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    lfs 1, .LCPI118_0 at toc@l(3)
+; PC64LE9-NEXT:    fmr 2, 1
+; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    blr
 entry:
   %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(

diff  --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
new file mode 100644
index 000000000000..d53de99fe0b5
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
@@ -0,0 +1,244 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+
+; Verify that floor(10.1) is folded to 10.0 when the exception behavior is 'ignore'.
+define double @floor_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.floor.f64(
+                                               double 1.010000e+01,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @floor_01
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that floor(-10.1) is folded to -11.0 when the exception behavior is not 'ignore'.
+define double @floor_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.floor.f64(
+                                               double -1.010000e+01,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @floor_02
+  ; CHECK: ret double -1.100000e+01
+}
+
+; Verify that ceil(10.1) is folded to 11.0 when the exception behavior is 'ignore'.
+define double @ceil_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.ceil.f64(
+                                               double 1.010000e+01,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @ceil_01
+  ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that ceil(-10.1) is folded to -10.0 when the exception behavior is not 'ignore'.
+define double @ceil_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.ceil.f64(
+                                               double -1.010000e+01,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @ceil_02
+  ; CHECK: ret double -1.000000e+01
+}
+
+; Verify that trunc(10.1) is folded to 10.0 when the exception behavior is 'ignore'.
+define double @trunc_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 1.010000e+01,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @trunc_01
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that trunc(-10.1) is folded to -10.0 when the exception behavior is NOT 'ignore'.
+define double @trunc_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double -1.010000e+01,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @trunc_02
+  ; CHECK: ret double -1.000000e+01
+}
+
+; Verify that round(10.5) is folded to 11.0 when the exception behavior is 'ignore'.
+define double @round_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.round.f64(
+                                               double 1.050000e+01,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @round_01
+  ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that floor(-10.5) is folded to -11.0 when the exception behavior is NOT 'ignore'.
+define double @round_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.round.f64(
+                                               double -1.050000e+01,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @round_02
+  ; CHECK: ret double -1.100000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 11.0 when the rounding mode is 'upward'.
+define double @nearbyint_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.upward",
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_01
+  ; CHECK: ret double 1.100000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'downward'.
+define double @nearbyint_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.downward",
+                                               metadata !"fpexcept.maytrap") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_02
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'towardzero'.
+define double @nearbyint_03() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.towardzero",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_03
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is folded to 10.0 when the rounding mode is 'tonearest'.
+define double @nearbyint_04() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.tonearest",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_04
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that nearbyint(10.5) is NOT folded if the rounding mode is 'dynamic'.
+define double @nearbyint_05() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.nearbyint.f64(
+                                               double 1.050000e+01,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nearbyint_05
+  ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.nearbyint
+  ; CHECK: ret double [[VAL]]
+}
+
+; Verify that trunc(SNAN) is NOT folded if the exception behavior mode is not 'ignore'.
+define double @nonfinite_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 0x7ff4000000000000,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nonfinite_01
+  ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.trunc
+  ; CHECK: ret double [[VAL]]
+}
+
+; Verify that trunc(SNAN) is folded to QNAN if the exception behavior mode is 'ignore'.
+define double @nonfinite_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 0x7ff4000000000000,
+                                               metadata !"fpexcept.ignore") #0
+  ret double %result
+  ; CHECK-LABEL: @nonfinite_02
+  ; CHECK: ret double 0x7FF8000000000000
+}
+
+; Verify that trunc(QNAN) is folded even if the exception behavior mode is not 'ignore'.
+define double @nonfinite_03() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 0x7ff8000000000000,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nonfinite_03
+  ; CHECK: ret double 0x7FF8000000000000
+}
+
+; Verify that trunc(+Inf) is folded even if the exception behavior mode is not 'ignore'.
+define double @nonfinite_04() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.trunc.f64(
+                                               double 0x7ff0000000000000,
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @nonfinite_04
+  ; CHECK: ret double 0x7FF0000000000000
+}
+
+; Verify that rint(10) is folded to 10.0 when the rounding mode is 'tonearest'.
+define double @rint_01() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.rint.f64(
+                                               double 1.000000e+01,
+                                               metadata !"round.tonearest",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @rint_01
+  ; CHECK: ret double 1.000000e+01
+}
+
+; Verify that rint(10.1) is NOT folded to 10.0 when the exception behavior is 'strict'.
+define double @rint_02() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.rint.f64(
+                                               double 1.010000e+01,
+                                               metadata !"round.tonearest",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+  ; CHECK-LABEL: @rint_02
+  ; CHECK: [[VAL:%.+]] = {{.*}}call double @llvm.experimental.constrained.rint
+  ; CHECK: ret double [[VAL]]
+}
+
+; Verify that rint(10.1) is folded to 10.0 when the exception behavior is not 'strict'.
+define double @rint_03() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.rint.f64(
+                                               double 1.010000e+01,
+                                               metadata !"round.tonearest",
+                                               metadata !"fpexcept.maytrap") #0
+  ret double %result
+  ; CHECK-LABEL: @rint_03
+  ; CHECK: ret double 1.000000e+01
+}
+
+
+attributes #0 = { strictfp }
+
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.floor.f64(double, metadata)
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
+declare double @llvm.experimental.constrained.round.f64(double, metadata)
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+