[llvm-branch-commits] [llvm] e936d1e - [VE] Add vfadd, vfsub, vfmul, and vfdiv intrinsic instructions

Fri Dec 4 05:03:45 PST 2020

Author: Kazushi (Jam) Marukawa
Date: 2020-12-04T21:58:51+09:00
New Revision: e936d1e11345db5d2de3d1a73bd705d8fcf99fbc

URL: https://github.com/llvm/llvm-project/commit/e936d1e11345db5d2de3d1a73bd705d8fcf99fbc
DIFF: https://github.com/llvm/llvm-project/commit/e936d1e11345db5d2de3d1a73bd705d8fcf99fbc.diff

LOG: [VE] Add vfadd, vfsub, vfmul, and vfdiv intrinsic instructions

Add vfadd, vfsub, vfmul, and vfdiv intrinsic instructions and
regression tests.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D92649

Added: 
    llvm/test/CodeGen/VE/VELIntrinsics/vfadd.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vfdiv.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vfmul.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vfsub.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
    llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
index e1f87122237a..711fc8cddfb3 100644

--- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
+++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
@@ -490,3 +490,69 @@ let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsmvl : GCCBuiltin<"__builtin_ve
 let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;

diff  --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
index fc34a0c805d7..14790f2568f3 100644
--- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
+++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
@@ -711,3 +711,69 @@ def : Pat<(int_ve_vl_vsfa_vvssl v256f64:$vz, simm7:$I, i64:$sz, i32:$vl), (VSFAv
 def : Pat<(int_ve_vl_vsfa_vvssvl v256f64:$vz, simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VSFAvirl_v v256f64:$vz, (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>;
 def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvrrml_v v256f64:$vz, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
 def : Pat<(int_ve_vl_vsfa_vvssmvl v256f64:$vz, simm7:$I, i64:$sz, v256i1:$vm, v256f64:$pt, i32:$vl), (VSFAvirml_v v256f64:$vz, (LO7 $I), i64:$sz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfaddd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFADDDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfaddd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfaddd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFADDSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfadds_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFADDSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfadds_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFADDSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfadds_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFADDSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFADDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfadd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFADDrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfadd_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFADDrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfadd_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFADDrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFSUBDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFSUBSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFSUBSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfsubs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFSUBSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfsubs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFSUBSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFSUBvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfsub_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFSUBrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfsub_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFSUBrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfsub_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFSUBrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuld_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFMULDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuld_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuld_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFMULSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuls_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFMULSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfmuls_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFMULSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfmuls_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFMULSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (PVFMULvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmul_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vsvl i64:$sy, v256f64:$vz, i32:$vl), (PVFMULrvl i64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_pvfmul_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVFMULrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvfmul_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFMULrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVDvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivd_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vsvl f64:$sy, v256f64:$vz, i32:$vl), (VFDIVDrvl f64:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivd_vsvvl f64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVDrvl_v f64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivd_vsvmvl f64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVDrvml_v f64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vvvl v256f64:$vy, v256f64:$vz, i32:$vl), (VFDIVSvvl v256f64:$vy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivs_vvvvl v256f64:$vy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSvvl_v v256f64:$vy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vsvl f32:$sy, v256f64:$vz, i32:$vl), (VFDIVSrvl f32:$sy, v256f64:$vz, i32:$vl)>;
+def : Pat<(int_ve_vl_vfdivs_vsvvl f32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (VFDIVSrvl_v f32:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vfdivs_vsvmvl f32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VFDIVSrvml_v f32:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfadd.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfadd.ll
new file mode 100644
index 000000000000..c745f5bb5748
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfadd.ll
@@ -0,0 +1,314 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating add intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VFADD*vvl, VFADD*vvl_v, VFADD*rvl, VFADD*rvl_v, VFADD*vvml_v,
+;;;   VFADD*rvml_v, PVFADD*vvl, PVFADD*vvl_v, PVFADD*rvl, PVFADD*rvl_v,
+;;;   PVFADD*vvml_v, and PVFADD*rvml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfaddd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfaddd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfadd.d %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfaddd.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfaddd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfaddd_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfaddd_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfadd.d %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfaddd.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfaddd.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfaddd_vsvl(double %0, <256 x double> %1) {
+; CHECK-LABEL: vfaddd_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfadd.d %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfaddd.vsvl(double %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfaddd.vsvl(double, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfaddd_vsvvl(double %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfaddd_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfadd.d %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfaddd.vsvvl(double %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfaddd.vsvvl(double, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfaddd_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfaddd_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfadd.d %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfaddd.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfaddd.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfaddd_vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfaddd_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfadd.d %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfaddd.vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfaddd.vsvmvl(double, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfadds_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfadds_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfadd.s %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfadds.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfadds.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfadds_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfadds_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfadd.s %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfadds.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfadds.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfadds_vsvl(float %0, <256 x double> %1) {
+; CHECK-LABEL: vfadds_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfadd.s %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfadds.vsvl(float %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfadds.vsvl(float, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfadds_vsvvl(float %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfadds_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfadd.s %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfadds.vsvvl(float %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfadds.vsvvl(float, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfadds_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfadds_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfadd.s %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfadds.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfadds.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfadds_vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfadds_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfadd.s %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfadds.vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfadds.vsvmvl(float, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfadd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvfadd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfadd %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfadd.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfadd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfadd_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfadd_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfadd %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfadd.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfadd.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfadd_vsvl(i64 %0, <256 x double> %1) {
+; CHECK-LABEL: pvfadd_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfadd %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfadd.vsvl(i64 %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfadd.vsvl(i64, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfadd_vsvvl(i64 %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfadd_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfadd %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfadd.vsvvl(i64 %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfadd.vsvvl(i64, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfadd_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfadd_vvvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfadd %v2, %v0, %v1, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfadd.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfadd.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfadd_vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfadd_vsvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfadd %v1, %s0, %v0, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfadd.vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfadd.vsvMvl(i64, <256 x double>, <512 x i1>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfdiv.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfdiv.ll
new file mode 100644
index 000000000000..6e486c3e4154
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfdiv.ll
@@ -0,0 +1,211 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test floating divide intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VFDIV*vvl, VFDIV*vvl_v, VFDIV*rvl, VFDIV*rvl_v, VFDIV*vvml_v, and
+;;;   VFDIV*rvml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfdivd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfdiv.d %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfdivd.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivd_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfdivd_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfdiv.d %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfdivd.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivd.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivd_vsvl(double %0, <256 x double> %1) {
+; CHECK-LABEL: vfdivd_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfdiv.d %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfdivd.vsvl(double %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivd.vsvl(double, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivd_vsvvl(double %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfdivd_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfdiv.d %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfdivd.vsvvl(double %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivd.vsvvl(double, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivd_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfdivd_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfdiv.d %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfdivd.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivd.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivd_vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfdivd_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfdiv.d %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfdivd.vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivd.vsvmvl(double, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivs_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfdivs_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfdiv.s %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfdivs.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivs.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivs_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfdivs_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfdiv.s %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfdivs.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivs.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivs_vsvl(float %0, <256 x double> %1) {
+; CHECK-LABEL: vfdivs_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfdiv.s %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfdivs.vsvl(float %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivs.vsvl(float, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivs_vsvvl(float %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfdivs_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfdiv.s %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfdivs.vsvvl(float %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivs.vsvvl(float, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivs_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfdivs_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfdiv.s %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfdivs.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivs.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfdivs_vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfdivs_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfdiv.s %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfdivs.vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfdivs.vsvmvl(float, <256 x double>, <256 x i1>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfmul.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfmul.ll
new file mode 100644
index 000000000000..6e3bb9787762
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfmul.ll
@@ -0,0 +1,314 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating multiply intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VFMUL*vvl, VFMUL*vvl_v, VFMUL*rvl, VFMUL*rvl_v, VFMUL*vvml_v,
+;;;   VFMUL*rvml_v, PVFMUL*vvl, PVFMUL*vvl_v, PVFMUL*rvl, PVFMUL*rvl_v,
+;;;   PVFMUL*vvml_v, and PVFMUL*rvml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuld_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfmuld_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmul.d %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmuld.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuld.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuld_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmuld_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmul.d %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmuld.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuld.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuld_vsvl(double %0, <256 x double> %1) {
+; CHECK-LABEL: vfmuld_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmul.d %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmuld.vsvl(double %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuld.vsvl(double, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuld_vsvvl(double %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmuld_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmul.d %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmuld.vsvvl(double %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuld.vsvvl(double, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuld_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmuld_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmul.d %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmuld.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuld.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuld_vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmuld_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmul.d %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmuld.vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuld.vsvmvl(double, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuls_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfmuls_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmul.s %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmuls.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuls.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuls_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmuls_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmul.s %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmuls.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuls.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuls_vsvl(float %0, <256 x double> %1) {
+; CHECK-LABEL: vfmuls_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmul.s %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfmuls.vsvl(float %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuls.vsvl(float, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuls_vsvvl(float %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfmuls_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmul.s %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfmuls.vsvvl(float %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuls.vsvvl(float, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuls_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmuls_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfmul.s %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmuls.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuls.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfmuls_vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfmuls_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfmul.s %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfmuls.vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfmuls.vsvmvl(float, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmul_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvfmul_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmul %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfmul.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmul.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmul_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfmul_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmul %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfmul.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmul.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmul_vsvl(i64 %0, <256 x double> %1) {
+; CHECK-LABEL: pvfmul_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmul %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfmul.vsvl(i64 %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmul.vsvl(i64, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmul_vsvvl(i64 %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfmul_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmul %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfmul.vsvvl(i64 %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmul.vsvvl(i64, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmul_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfmul_vvvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfmul %v2, %v0, %v1, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfmul.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmul.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfmul_vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfmul_vsvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfmul %v1, %s0, %v0, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfmul.vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfmul.vsvMvl(i64, <256 x double>, <512 x i1>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vfsub.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vfsub.ll
new file mode 100644
index 000000000000..31a65010f0eb
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vfsub.ll
@@ -0,0 +1,314 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating subtract intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VFSUB*vvl, VFSUB*vvl_v, VFSUB*rvl, VFSUB*rvl_v, VFSUB*vvml_v,
+;;;   VFSUB*rvml_v, PVFSUB*vvl, PVFSUB*vvl_v, PVFSUB*rvl, PVFSUB*rvl_v,
+;;;   PVFSUB*vvml_v, and PVFSUB*rvml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfsubd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsub.d %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfsubd.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubd_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfsubd_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsub.d %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfsubd.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubd.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubd_vsvl(double %0, <256 x double> %1) {
+; CHECK-LABEL: vfsubd_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfsub.d %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfsubd.vsvl(double %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubd.vsvl(double, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubd_vsvvl(double %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfsubd_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfsub.d %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfsubd.vsvvl(double %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubd.vsvvl(double, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubd_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfsubd_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsub.d %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfsubd.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubd.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubd_vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfsubd_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfsub.d %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfsubd.vsvmvl(double %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubd.vsvmvl(double, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubs_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vfsubs_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsub.s %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfsubs.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubs.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubs_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfsubs_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsub.s %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfsubs.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubs.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubs_vsvl(float %0, <256 x double> %1) {
+; CHECK-LABEL: vfsubs_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfsub.s %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vfsubs.vsvl(float %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubs.vsvl(float, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubs_vsvvl(float %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vfsubs_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfsub.s %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vfsubs.vsvvl(float %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubs.vsvvl(float, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubs_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfsubs_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vfsub.s %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfsubs.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubs.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vfsubs_vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vfsubs_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vfsub.s %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vfsubs.vsvmvl(float %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vfsubs.vsvmvl(float, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfsub_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvfsub_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfsub %v0, %v0, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfsub.vvvl(<256 x double> %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfsub.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfsub_vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfsub_vvvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfsub %v2, %v0, %v1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfsub.vvvvl(<256 x double> %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfsub.vvvvl(<256 x double>, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfsub_vsvl(i64 %0, <256 x double> %1) {
+; CHECK-LABEL: pvfsub_vsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfsub %v0, %s0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvfsub.vsvl(i64 %0, <256 x double> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfsub.vsvl(i64, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfsub_vsvvl(i64 %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: pvfsub_vsvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfsub %v1, %s0, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvfsub.vsvvl(i64 %0, <256 x double> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfsub.vsvvl(i64, <256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfsub_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfsub_vvvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvfsub %v2, %v0, %v1, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfsub.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfsub.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvfsub_vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: pvfsub_vsvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    pvfsub %v1, %s0, %v0, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.pvfsub.vsvMvl(i64 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvfsub.vsvMvl(i64, <256 x double>, <512 x i1>, <256 x double>, i32)