[llvm] 23034a4 - [VE] Add vfsqrt, vfcmp, vfmax, and vfmin intrinsic instructions

Fri Dec 4 14:52:21 PST 2020

Author: Kazushi (Jam) Marukawa
Date: 2020-12-05T07:52:14+09:00
New Revision: 23034a4a63e18672be33405f7951d60a0658e67b

URL: https://github.com/llvm/llvm-project/commit/23034a4a63e18672be33405f7951d60a0658e67b
DIFF: https://github.com/llvm/llvm-project/commit/23034a4a63e18672be33405f7951d60a0658e67b.diff

LOG: [VE] Add vfsqrt, vfcmp, vfmax, and vfmin intrinsic instructions

Add vfsqrt, vfcmp, vfmax, and vfmin intrinsic instructions and
regression tests.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D92651

Added: 
    llvm/test/CodeGen/VE/VELIntrinsics/vfcmp.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vfmax.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vfmin.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vfsqrt.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
    llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
index 711fc8cddfb3..aaf8ae6e23b5 100644

--- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
+++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
@@ -556,3 +556,61 @@ let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvl : GCCBuiltin<"__builtin_ve_
 let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;

diff  --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
index 14790f2568f3..1e168063da30 100644
--- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
+++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
@@ -777,3 +777,61 @@ def : Pat<(int_ve_vl_vfdivs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFDIVSrvl f32:
 def : Pat<(int_ve_vl_vfdivs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
 def : Pat<(int_ve_vl_vfdivs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
 def : Pat<(int_ve_vl_vfdivs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsqrtd_vvl v256f64:$vy, i32:$vl), (VFSQRTDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsqrts_vvl v256f64:$vy, i32:$vl), (VFSQRTSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VFSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFCMPDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmpd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFCMPSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmps_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFCMPSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfcmps_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFCMPSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfcmps_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFCMPSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFCMPvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFCMPrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFCMPrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfcmp_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFCMPrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMAXDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMAXSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMAXSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMAXSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmaxs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMAXSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMAXvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmax_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMAXrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmax_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMAXrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmax_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMAXrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmind_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMINDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmind_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmind_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMINSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmins_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMINSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmins_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMINSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmins_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMINSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMINvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmin_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMINrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmin_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMINrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmin_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMINrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfcmp.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfcmp.ll
new file mode 100644
index 000000000000..8cd540fab822
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfcmp.ll
@@ -0,0 +1,314 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating compare intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VFCMP*vvl, VFCMP*vvl_v, VFCMP*rvl, VFCMP*rvl_v, VFCMP*vvml_v,
+;;;   VFCMP*rvml_v, PVFCMP*vvl, PVFCMP*vvl_v, PVFCMP*rvl, PVFCMP*rvl_v,
+;;;   PVFCMP*vvml_v, and PVFCMP*rvml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmpd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfcmpd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfcmp.d %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfcmpd.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmpd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmpd_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfcmpd_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfcmp.d %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfcmpd.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmpd.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmpd_vsvl(double %0, <256 x double> %1) {
+; CHECK-LABEL: vfcmpd_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfcmp.d %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfcmpd.vsvl(double %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmpd.vsvl(double, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmpd_vsvvl(double %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfcmpd_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfcmp.d %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfcmpd.vsvvl(double %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmpd.vsvvl(double, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmpd_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfcmpd_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfcmp.d %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfcmpd.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmpd.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmpd_vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfcmpd_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfcmp.d %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfcmpd.vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmpd.vsvmvl(double, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmps_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfcmps_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfcmp.s %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfcmps.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmps.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmps_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfcmps_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfcmp.s %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfcmps.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmps.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmps_vsvl(float %0, <256 x double> %1) {
+; CHECK-LABEL: vfcmps_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfcmp.s %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfcmps.vsvl(float %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmps.vsvl(float, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmps_vsvvl(float %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfcmps_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfcmp.s %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfcmps.vsvvl(float %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmps.vsvvl(float, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmps_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfcmps_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfcmp.s %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfcmps.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmps.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfcmps_vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfcmps_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfcmp.s %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfcmps.vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfcmps.vsvmvl(float, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfcmp_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvfcmp_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfcmp %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfcmp.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfcmp.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfcmp_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfcmp_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfcmp %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfcmp.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfcmp.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfcmp_vsvl(i64 %0, <256 x double> %1) {
+; CHECK-LABEL: pvfcmp_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfcmp %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfcmp.vsvl(i64 %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfcmp.vsvl(i64, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfcmp_vsvvl(i64 %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfcmp_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfcmp %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfcmp.vsvvl(i64 %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfcmp.vsvvl(i64, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfcmp_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfcmp_vvvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfcmp %v2, %v0, %v1, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfcmp.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfcmp.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfcmp_vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfcmp_vsvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfcmp %v1, %s0, %v0, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfcmp.vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfcmp.vsvMvl(i64, <256 x double>, <512 x i1>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfmax.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfmax.ll
new file mode 100644
index 000000000000..df820137e6fd
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfmax.ll
@@ -0,0 +1,314 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating compare and select maximum intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VFMAX*vvl, VFMAX*vvl_v, VFMAX*rvl, VFMAX*rvl_v, VFMAX*vvml_v,
+;;;   VFMAX*rvml_v, PVFMAX*vvl, PVFMAX*vvl_v, PVFMAX*rvl, PVFMAX*rvl_v,
+;;;   PVFMAX*vvml_v, and PVFMAX*rvml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfmaxd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmax.d %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmaxd.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxd_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmaxd_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmax.d %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmaxd.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxd.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxd_vsvl(double %0, <256 x double> %1) {
+; CHECK-LABEL: vfmaxd_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmax.d %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmaxd.vsvl(double %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxd.vsvl(double, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxd_vsvvl(double %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmaxd_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmax.d %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmaxd.vsvvl(double %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxd.vsvvl(double, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxd_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmaxd_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmax.d %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmaxd.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxd.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxd_vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmaxd_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmax.d %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmaxd.vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxd.vsvmvl(double, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxs_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfmaxs_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmax.s %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmaxs.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxs.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxs_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmaxs_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmax.s %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmaxs.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxs.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxs_vsvl(float %0, <256 x double> %1) {
+; CHECK-LABEL: vfmaxs_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmax.s %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmaxs.vsvl(float %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxs.vsvl(float, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxs_vsvvl(float %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmaxs_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmax.s %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmaxs.vsvvl(float %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxs.vsvvl(float, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxs_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmaxs_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmax.s %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmaxs.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxs.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmaxs_vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmaxs_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmax.s %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmaxs.vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmaxs.vsvmvl(float, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmax_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvfmax_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmax %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfmax.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmax.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmax_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfmax_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmax %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfmax.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmax.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmax_vsvl(i64 %0, <256 x double> %1) {
+; CHECK-LABEL: pvfmax_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmax %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfmax.vsvl(i64 %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmax.vsvl(i64, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmax_vsvvl(i64 %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfmax_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmax %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfmax.vsvvl(i64 %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmax.vsvvl(i64, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmax_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfmax_vvvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmax %v2, %v0, %v1, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfmax.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmax.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmax_vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfmax_vsvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmax %v1, %s0, %v0, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfmax.vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmax.vsvMvl(i64, <256 x double>, <512 x i1>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfmin.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfmin.ll
new file mode 100644
index 000000000000..ba450f348321
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfmin.ll
@@ -0,0 +1,314 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating compare and select minimum intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VFMIN*vvl, VFMIN*vvl_v, VFMIN*rvl, VFMIN*rvl_v, VFMIN*vvml_v,
+;;;   VFMIN*rvml_v, PVFMIN*vvl, PVFMIN*vvl_v, PVFMIN*rvl, PVFMIN*rvl_v,
+;;;   PVFMIN*vvml_v, and PVFMIN*rvml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmind_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfmind_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmin.d %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmind.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmind.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmind_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmind_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmin.d %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmind.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmind.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmind_vsvl(double %0, <256 x double> %1) {
+; CHECK-LABEL: vfmind_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmin.d %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmind.vsvl(double %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmind.vsvl(double, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmind_vsvvl(double %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmind_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmin.d %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmind.vsvvl(double %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmind.vsvvl(double, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmind_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmind_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmin.d %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmind.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmind.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmind_vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmind_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmin.d %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmind.vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmind.vsvmvl(double, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmins_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfmins_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmin.s %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmins.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmins.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmins_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmins_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmin.s %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmins.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmins.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmins_vsvl(float %0, <256 x double> %1) {
+; CHECK-LABEL: vfmins_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmin.s %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmins.vsvl(float %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmins.vsvl(float, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmins_vsvvl(float %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmins_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmin.s %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmins.vsvvl(float %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmins.vsvvl(float, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmins_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmins_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmin.s %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmins.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmins.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmins_vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmins_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmin.s %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmins.vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmins.vsvmvl(float, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmin_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvfmin_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmin %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfmin.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmin.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmin_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfmin_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmin %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfmin.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmin.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmin_vsvl(i64 %0, <256 x double> %1) {
+; CHECK-LABEL: pvfmin_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmin %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfmin.vsvl(i64 %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmin.vsvl(i64, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmin_vsvvl(i64 %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfmin_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmin %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfmin.vsvvl(i64 %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmin.vsvvl(i64, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmin_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfmin_vvvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmin %v2, %v0, %v1, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfmin.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmin.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmin_vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfmin_vsvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmin %v1, %s0, %v0, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfmin.vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmin.vsvMvl(i64, <256 x double>, <512 x i1>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfsqrt.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfsqrt.ll
new file mode 100644
index 000000000000..29fb6088a666
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfsqrt.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating square root intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VFSQRT*vl and VFSQRT*vl_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsqrtd_vvl(<256 x double> %0) {
+; CHECK-LABEL: vfsqrtd_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsqrt.d %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vfsqrtd.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsqrtd.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsqrtd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfsqrtd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsqrt.d %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfsqrtd.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsqrtd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsqrts_vvl(<256 x double> %0) {
+; CHECK-LABEL: vfsqrts_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsqrt.s %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vfsqrts.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsqrts.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsqrts_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfsqrts_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsqrt.s %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfsqrts.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsqrts.vvvl(<256 x double>, <256 x double>, i32)