[llvm] 03898b7 - [VE] Add vrcp, vrsqrt, vcvt, vmrg, and vshf intrinsic instructions

Mon Dec 7 03:36:01 PST 2020

Author: Kazushi (Jam) Marukawa
Date: 2020-12-07T20:30:12+09:00
New Revision: 03898b79fb76e0a4dabf9f35d7dc51b4c497adb0

URL: https://github.com/llvm/llvm-project/commit/03898b79fb76e0a4dabf9f35d7dc51b4c497adb0
DIFF: https://github.com/llvm/llvm-project/commit/03898b79fb76e0a4dabf9f35d7dc51b4c497adb0.diff

LOG: [VE] Add vrcp, vrsqrt, vcvt, vmrg, and vshf intrinsic instructions

Add vrcp, vrsqrt, vcvt, vmrg, and vshf intrinsic instructions and
regression tests.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D92750

Added: 
    llvm/test/CodeGen/VE/VELIntrinsics/vcvt.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vmrg.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vrcp.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vrsqrt.ll
    llvm/test/CodeGen/VE/VELIntrinsics/vshf.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
    llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
index f8f422f4ef2b..f62d7cdafdd0 100644

--- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
+++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td
@@ -722,3 +722,79 @@ let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvvl : GCCBuiltin<"__builtin_
 let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
 let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>;
+let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsvl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>;

diff  --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
index 643b1cd5d94a..651ea2fb9015 100644
--- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
+++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td
@@ -943,3 +943,81 @@ def : Pat<(int_ve_vl_pvfnmsb_vvsvvl v256f64:$vy, i64:$sy, v256f64:$vw, v256f64:$
 def : Pat<(int_ve_vl_pvfnmsb_vvvvMvl v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvvvml_v v256f64:$vy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
 def : Pat<(int_ve_vl_pvfnmsb_vsvvMvl i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBrvvml_v i64:$sy, v256f64:$vz, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
 def : Pat<(int_ve_vl_pvfnmsb_vvsvMvl v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, v256f64:$pt, i32:$vl), (PVFNMSBvrvml_v v256f64:$vy, i64:$sy, v256f64:$vw, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrcpd_vvl v256f64:$vy, i32:$vl), (VRCPDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrcpd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrcps_vvl v256f64:$vy, i32:$vl), (VRCPSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrcps_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRCPSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrcp_vvl v256f64:$vy, i32:$vl), (PVRCPvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrcp_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRCPvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtd_vvl v256f64:$vy, i32:$vl), (VRSQRTDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrts_vvl v256f64:$vy, i32:$vl), (VRSQRTSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrts_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrsqrt_vvl v256f64:$vy, i32:$vl), (PVRSQRTvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrsqrt_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtdnex_vvl v256f64:$vy, i32:$vl), (VRSQRTDNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtdnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTDNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vrsqrtsnex_vvl v256f64:$vy, i32:$vl), (VRSQRTSNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vrsqrtsnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VRSQRTSNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvrsqrtnex_vvl v256f64:$vy, i32:$vl), (PVRSQRTNEXvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvrsqrtnex_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVRSQRTNEXvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDSXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdsxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvl v256f64:$vy, i32:$vl), (VCVTWDZXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWDZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwdzxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWDZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSSXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSSXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwssxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSSXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszx_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvl v256f64:$vy, i32:$vl), (VCVTWSZXvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTWSZXvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtwszxrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTWSZXvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtws_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtws_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtws_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_NONE, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvl v256f64:$vy, i32:$vl), (PVCVTWSvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTWSvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtwsrz_vvMvl v256f64:$vy, v512i1:$vm, v256f64:$pt, i32:$vl), (PVCVTWSvml_v RD_RZ, v256f64:$vy, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtld_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_NONE, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtld_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_NONE, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtld_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_NONE, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvl v256f64:$vy, i32:$vl), (VCVTLDvl RD_RZ, v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTLDvl_v RD_RZ, v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtldrz_vvmvl v256f64:$vy, v256i1:$vm, v256f64:$pt, i32:$vl), (VCVTLDvml_v RD_RZ, v256f64:$vy, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtdw_vvl v256f64:$vy, i32:$vl), (VCVTDWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtdw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtsw_vvl v256f64:$vy, i32:$vl), (VCVTSWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_pvcvtsw_vvl v256f64:$vy, i32:$vl), (PVCVTSWvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_pvcvtsw_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (PVCVTSWvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtdl_vvl v256f64:$vy, i32:$vl), (VCVTDLvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtdl_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDLvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtds_vvl v256f64:$vy, i32:$vl), (VCVTDSvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtds_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTDSvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vcvtsd_vvl v256f64:$vy, i32:$vl), (VCVTSDvl v256f64:$vy, i32:$vl)>;
+def : Pat<(int_ve_vl_vcvtsd_vvvl v256f64:$vy, v256f64:$pt, i32:$vl), (VCVTSDvl_v v256f64:$vy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGvvml v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vvvmvl v256f64:$vy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGvvml_v v256f64:$vy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vsvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGrvml i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vsvmvl i64:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGrvml_v i64:$sy, v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrg_vsvml simm7:$I, v256f64:$vz, v256i1:$vm, i32:$vl), (VMRGivml (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrg_vsvmvl simm7:$I, v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VMRGivml_v (LO7 $I), v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vmrgw_vvvMl v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl), (VMRGWvvml v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl)>;
+def : Pat<(int_ve_vl_vmrgw_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (VMRGWvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl), (VSHFvvrl v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl)>;
+def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, i64:$sy, v256f64:$pt, i32:$vl), (VSHFvvrl_v v256f64:$vy, v256f64:$vz, i64:$sy, i32:$vl, v256f64:$pt)>;
+def : Pat<(int_ve_vl_vshf_vvvsl v256f64:$vy, v256f64:$vz, uimm6:$N, i32:$vl), (VSHFvvil v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl)>;
+def : Pat<(int_ve_vl_vshf_vvvsvl v256f64:$vy, v256f64:$vz, uimm6:$N, v256f64:$pt, i32:$vl), (VSHFvvil_v v256f64:$vy, v256f64:$vz, (ULO7 $N), i32:$vl, v256f64:$pt)>;

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vcvt.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vcvt.ll
new file mode 100644
index 000000000000..155a7bacbfdd
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vcvt.ll
@@ -0,0 +1,817 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector convert to fixed point intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VCVT*vl, VCVT*vl_v, VCVT*vml_v, PVCVT*vl, PVCVT*vl_v, and
+;;;   PVCVT*vml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdsx_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtwdsx_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.sx %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdsx.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdsx.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdsx_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtwdsx_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.sx %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdsx.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdsx.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdsx_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtwdsx_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.sx %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdsx.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdsx.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdsxrz_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtwdsxrz_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.sx.rz %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdsxrz.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdsxrz.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdsxrz_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtwdsxrz_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.sx.rz %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdsxrz.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdsxrz.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdsxrz_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtwdsxrz_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.sx.rz %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdsxrz.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdsxrz.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdzx_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtwdzx_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.zx %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdzx.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdzx.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdzx_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtwdzx_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.zx %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdzx.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdzx.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdzx_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtwdzx_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.zx %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdzx.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdzx.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdzxrz_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtwdzxrz_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.zx.rz %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdzxrz.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdzxrz.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdzxrz_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtwdzxrz_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.zx.rz %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdzxrz.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdzxrz.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwdzxrz_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtwdzxrz_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.d.zx.rz %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtwdzxrz.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwdzxrz.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwssx_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtwssx_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.sx %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtwssx.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwssx.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwssx_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtwssx_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.sx %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtwssx.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwssx.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwssx_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtwssx_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.sx %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtwssx.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwssx.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwssxrz_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtwssxrz_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.sx.rz %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtwssxrz.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwssxrz.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwssxrz_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtwssxrz_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.sx.rz %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtwssxrz.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwssxrz.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwssxrz_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtwssxrz_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.sx.rz %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtwssxrz.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwssxrz.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwszx_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtwszx_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.zx %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtwszx.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwszx.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwszx_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtwszx_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.zx %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtwszx.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwszx.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwszx_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtwszx_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.zx %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtwszx.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwszx.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwszxrz_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtwszxrz_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.zx.rz %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtwszxrz.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwszxrz.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwszxrz_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtwszxrz_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.zx.rz %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtwszxrz.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwszxrz.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtwszxrz_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtwszxrz_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.w.s.zx.rz %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtwszxrz.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtwszxrz.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtld_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtld_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.l.d %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtld.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtld.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtld_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtld_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.l.d %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtld.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtld.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtld_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtld_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.l.d %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtld.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtld.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtldrz_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtldrz_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.l.d.rz %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtldrz.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtldrz.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtldrz_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtldrz_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.l.d.rz %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtldrz.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtldrz.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtldrz_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vcvtldrz_vvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.l.d.rz %v1, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vcvtldrz.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtldrz.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtdw_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtdw_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.d.w %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtdw.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtdw.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtdw_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtdw_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.d.w %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtdw.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtdw.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtsw_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtsw_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.s.w %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtsw.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtsw.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtsw_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtsw_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.s.w %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtsw.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtsw.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtdl_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtdl_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.d.l %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtdl.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtdl.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtdl_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtdl_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.d.l %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtdl.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtdl.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtds_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtds_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.d.s %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtds.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtds.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtds_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtds_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.d.s %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtds.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtds.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtsd_vvl(<256 x double> %0) {
+; CHECK-LABEL: vcvtsd_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.s.d %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vcvtsd.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtsd.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vcvtsd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vcvtsd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vcvt.s.d %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vcvtsd.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vcvtsd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvcvtws_vvl(<256 x double> %0) {
+; CHECK-LABEL: pvcvtws_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvcvt.w.s %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.pvcvtws.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvcvtws.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvcvtws_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvcvtws_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvcvt.w.s %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvcvtws.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvcvtws.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvcvtws_vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: pvcvtws_vvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvcvt.w.s %v1, %v0, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvcvtws.vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvcvtws.vvMvl(<256 x double>, <512 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvcvtwsrz_vvl(<256 x double> %0) {
+; CHECK-LABEL: pvcvtwsrz_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvcvt.w.s.rz %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.pvcvtwsrz.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvcvtwsrz.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvcvtwsrz_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvcvtwsrz_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvcvt.w.s.rz %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvcvtwsrz.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvcvtwsrz.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvcvtwsrz_vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: pvcvtwsrz_vvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvcvt.w.s.rz %v1, %v0, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.pvcvtwsrz.vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvcvtwsrz.vvMvl(<256 x double>, <512 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvcvtsw_vvl(<256 x double> %0) {
+; CHECK-LABEL: pvcvtsw_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvcvt.s.w %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.pvcvtsw.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvcvtsw.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvcvtsw_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvcvtsw_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvcvt.s.w %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvcvtsw.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvcvtsw.vvvl(<256 x double>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vmrg.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vmrg.ll
new file mode 100644
index 000000000000..0c81db934607
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vmrg.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector merge intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VMRG*vvml, VMRG*vvml_v, VMRG*rvml, VMRG*rvml_v, VMRG*ivml, and
+;;;   VMRG*ivml_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vmrg_vvvml(<256 x double> %0, <256 x double> %1, <256 x i1> %2) {
+; CHECK-LABEL: vmrg_vvvml:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vmrg %v0, %v0, %v1, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vvvml(<256 x double> %0, <256 x double> %1, <256 x i1> %2, i32 256)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vmrg.vvvml(<256 x double>, <256 x double>, <256 x i1>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vmrg_vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vmrg_vvvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vmrg %v2, %v0, %v1, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vvvmvl(<256 x double> %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vmrg.vvvmvl(<256 x double>, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vmrg_vsvml(i64 %0, <256 x double> %1, <256 x i1> %2) {
+; CHECK-LABEL: vmrg_vsvml:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vmrg %v0, %s0, %v0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvml(i64 %0, <256 x double> %1, <256 x i1> %2, i32 256)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vmrg.vsvml(i64, <256 x double>, <256 x i1>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vmrg_vsvmvl(i64 %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vmrg_vsvmvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vmrg %v1, %s0, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64 %0, <256 x double> %1, <256 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64, <256 x double>, <256 x i1>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vmrg_vsvml_imm(<256 x double> %0, <256 x i1> %1) {
+; CHECK-LABEL: vmrg_vsvml_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vmrg %v0, 8, %v0, %vm1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvml(i64 8, <256 x double> %0, <256 x i1> %1, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vmrg_vsvmvl_imm(<256 x double> %0, <256 x i1> %1, <256 x double> %2) {
+; CHECK-LABEL: vmrg_vsvmvl_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vmrg %v1, 8, %v0, %vm1
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrg.vsvmvl(i64 8, <256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vmrgw_vvvMl(<256 x double> %0, <256 x double> %1, <512 x i1> %2) {
+; CHECK-LABEL: vmrgw_vvvMl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vmrg.w %v0, %v0, %v1, %vm2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vvvMl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, i32 256)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vmrgw.vvvMl(<256 x double>, <256 x double>, <512 x i1>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vmrgw_vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) {
+; CHECK-LABEL: vmrgw_vvvMvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vmrg.w %v2, %v0, %v1, %vm2
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double> %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vrcp.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vrcp.ll
new file mode 100644
index 000000000000..5b24596aa994
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vrcp.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating reciprocal intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VRCP*vl, VRCP*vl_v, PVRCP*vl, and PVRCP*vl_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrcpd_vvl(<256 x double> %0) {
+; CHECK-LABEL: vrcpd_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrcp.d %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vrcpd.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrcpd.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrcpd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vrcpd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrcp.d %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vrcpd.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrcpd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrcps_vvl(<256 x double> %0) {
+; CHECK-LABEL: vrcps_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrcp.s %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vrcps.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrcps.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrcps_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vrcps_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrcp.s %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vrcps.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrcps.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvrcp_vvl(<256 x double> %0) {
+; CHECK-LABEL: pvrcp_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvrcp %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.pvrcp.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvrcp.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvrcp_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvrcp_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvrcp %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvrcp.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvrcp.vvvl(<256 x double>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vrsqrt.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vrsqrt.ll
new file mode 100644
index 000000000000..7ea47c3b614a
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vrsqrt.ll
@@ -0,0 +1,205 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector floating reciprocal square root intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VRSQRT*vl, VRSQRT*vl_v, PVRSQRT*vl, and PVRSQRT*vl_v
+;;;   instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrsqrtd_vvl(<256 x double> %0) {
+; CHECK-LABEL: vrsqrtd_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrsqrt.d %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vrsqrtd.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrsqrtd.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrsqrtd_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vrsqrtd_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrsqrt.d %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vrsqrtd.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrsqrtd.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrsqrts_vvl(<256 x double> %0) {
+; CHECK-LABEL: vrsqrts_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrsqrt.s %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vrsqrts.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrsqrts.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrsqrts_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vrsqrts_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrsqrt.s %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vrsqrts.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrsqrts.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrsqrtdnex_vvl(<256 x double> %0) {
+; CHECK-LABEL: vrsqrtdnex_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrsqrt.d.nex %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vrsqrtdnex.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrsqrtdnex.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrsqrtdnex_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vrsqrtdnex_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrsqrt.d.nex %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vrsqrtdnex.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrsqrtdnex.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrsqrtsnex_vvl(<256 x double> %0) {
+; CHECK-LABEL: vrsqrtsnex_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrsqrt.s.nex %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.vrsqrtsnex.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrsqrtsnex.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vrsqrtsnex_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vrsqrtsnex_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vrsqrt.s.nex %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vrsqrtsnex.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vrsqrtsnex.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvrsqrt_vvl(<256 x double> %0) {
+; CHECK-LABEL: pvrsqrt_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvrsqrt %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.pvrsqrt.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvrsqrt.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvrsqrt_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvrsqrt_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvrsqrt %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvrsqrt.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvrsqrt.vvvl(<256 x double>, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvrsqrtnex_vvl(<256 x double> %0) {
+; CHECK-LABEL: pvrsqrtnex_vvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvrsqrt.nex %v0, %v0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %2 = tail call fast <256 x double> @llvm.ve.vl.pvrsqrtnex.vvl(<256 x double> %0, i32 256)
+  ret <256 x double> %2
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvrsqrtnex.vvl(<256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @pvrsqrtnex_vvvl(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: pvrsqrtnex_vvvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    pvrsqrt.nex %v1, %v0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v1
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.pvrsqrtnex.vvvl(<256 x double> %0, <256 x double> %1, i32 128)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.pvrsqrtnex.vvvl(<256 x double>, <256 x double>, i32)

diff  --git a/llvm/test/CodeGen/VE/VELIntrinsics/vshf.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vshf.ll
new file mode 100644
index 000000000000..63c4185effd3
--- /dev/null
+++ b/llvm/test/CodeGen/VE/VELIntrinsics/vshf.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
+
+;;; Test vector shuffle intrinsic instructions
+;;;
+;;; Note:
+;;;   We test VSHF*vvrl, VSHF*vvrl_v, VSHF*vvil, and VSHF*vvil_v instructions.
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vshf_vvvsl(<256 x double> %0, <256 x double> %1, i64 %2) {
+; CHECK-LABEL: vshf_vvvsl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vshf %v0, %v0, %v1, %s0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vshf.vvvsl(<256 x double> %0, <256 x double> %1, i64 %2, i32 256)
+  ret <256 x double> %4
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vshf.vvvsl(<256 x double>, <256 x double>, i64, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vshf_vvvsvl(<256 x double> %0, <256 x double> %1, i64 %2, <256 x double> %3) {
+; CHECK-LABEL: vshf_vvvsvl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s1, 128
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vshf %v2, %v0, %v1, %s0
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %5 = tail call fast <256 x double> @llvm.ve.vl.vshf.vvvsvl(<256 x double> %0, <256 x double> %1, i64 %2, <256 x double> %3, i32 128)
+  ret <256 x double> %5
+}
+
+; Function Attrs: nounwind readnone
+declare <256 x double> @llvm.ve.vl.vshf.vvvsvl(<256 x double>, <256 x double>, i64, <256 x double>, i32)
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vshf_vvvsl_imm(<256 x double> %0, <256 x double> %1) {
+; CHECK-LABEL: vshf_vvvsl_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 256
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vshf %v0, %v0, %v1, 8
+; CHECK-NEXT:    b.l.t (, %s10)
+  %3 = tail call fast <256 x double> @llvm.ve.vl.vshf.vvvsl(<256 x double> %0, <256 x double> %1, i64 8, i32 256)
+  ret <256 x double> %3
+}
+
+; Function Attrs: nounwind readnone
+define fastcc <256 x double> @vshf_vvvsvl_imm(<256 x double> %0, <256 x double> %1, <256 x double> %2) {
+; CHECK-LABEL: vshf_vvvsvl_imm:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea %s0, 128
+; CHECK-NEXT:    lvl %s0
+; CHECK-NEXT:    vshf %v2, %v0, %v1, 8
+; CHECK-NEXT:    lea %s16, 256
+; CHECK-NEXT:    lvl %s16
+; CHECK-NEXT:    vor %v0, (0)1, %v2
+; CHECK-NEXT:    b.l.t (, %s10)
+  %4 = tail call fast <256 x double> @llvm.ve.vl.vshf.vvvsvl(<256 x double> %0, <256 x double> %1, i64 8, <256 x double> %2, i32 128)
+  ret <256 x double> %4
+}