[flang-commits] [flang] 88968f9 - [flang] Add PowerPC vec_ld, vec_lde, vec_ldl, vec_lxvp, vsx_lxvp, vec_xld2 and vec_xlw4 intrinsic

Mon Aug 14 13:59:08 PDT 2023

Author: Kelvin Li
Date: 2023-08-14T16:58:35-04:00
New Revision: 88968f9a73bcac8e2095dc6403556b72c0b470d3

URL: https://github.com/llvm/llvm-project/commit/88968f9a73bcac8e2095dc6403556b72c0b470d3
DIFF: https://github.com/llvm/llvm-project/commit/88968f9a73bcac8e2095dc6403556b72c0b470d3.diff

LOG: [flang] Add PowerPC vec_ld, vec_lde, vec_ldl, vec_lxvp, vsx_lxvp, vec_xld2 and vec_xlw4 intrinsic

Differential Revision: https://reviews.llvm.org/D157745

Added: 
    flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-load.f90

Modified: 
    flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
    flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
    flang/module/__ppc_intrinsics.f90
    flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
index d6116361a6ad7c..e5344922c2c476 100644

--- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
@@ -29,6 +29,10 @@ enum class VecOp {
   Convert,
   Ctf,
   Cvf,
+  Ld,
+  Lde,
+  Ldl,
+  Lxvp,
   Mergeh,
   Mergel,
   Msub,
@@ -53,6 +57,8 @@ enum class VecOp {
   Stxv,
   Stxvp,
   Sub,
+  Xld2,
+  Xlw4,
   Xor,
   Xst,
   Xst_be,
@@ -205,6 +211,10 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
   fir::ExtendedValue genVecPerm(mlir::Type resultType,
                                 llvm::ArrayRef<fir::ExtendedValue> args);
 
+  template <VecOp>
+  fir::ExtendedValue genVecLdCallGrp(mlir::Type resultType,
+                                     llvm::ArrayRef<fir::ExtendedValue> args);
+
   template <VecOp>
   fir::ExtendedValue genVecNmaddMsub(mlir::Type resultType,
                                      llvm::ArrayRef<fir::ExtendedValue> args);

diff  --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
index 708b3a57740365..f47780dd8cd8c1 100644
--- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
@@ -129,6 +129,26 @@ static constexpr IntrinsicHandler ppcHandlers[]{
      static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert),
      {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}},
      /*isElemental=*/true},
+    {"__ppc_vec_ld",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Ld>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_lde",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Lde>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_ldl",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Ldl>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_lxvp",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Lxvp>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
     {"__ppc_vec_mergeh",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecMerge<VecOp::Mergeh>),
@@ -248,6 +268,16 @@ static constexpr IntrinsicHandler ppcHandlers[]{
          &PI::genVecAddAndMulSubXor<VecOp::Sub>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
+    {"__ppc_vec_xld2_",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Xld2>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_xlw4_",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdCallGrp<VecOp::Xlw4>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
     {"__ppc_vec_xor",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecAddAndMulSubXor<VecOp::Xor>),
@@ -1293,6 +1323,132 @@ PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType,
   return builder.createConvert(loc, resultType, callOp);
 }
 
+static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
+                                      mlir::Location loc, mlir::Value baseAddr,
+                                      mlir::Value offset) {
+  auto typeExtent{fir::SequenceType::getUnknownExtent()};
+  // Construct an !fir.ref<!ref.array<?xi8>> type
+  auto arrRefTy{builder.getRefType(fir::SequenceType::get(
+      {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
+  // Convert arg to !fir.ref<!ref.array<?xi8>>
+  auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
+
+  return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
+}
+
+static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
+                                         mlir::Location loc, mlir::Value v,
+                                         int64_t len) {
+  assert(v.getType().isa<mlir::VectorType>());
+  assert(len > 0);
+  llvm::SmallVector<int64_t, 16> mask;
+  for (int64_t i = 0; i < len; ++i) {
+    mask.push_back(len - 1 - i);
+  }
+  auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
+  return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
+}
+
+// VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
+template <VecOp vop>
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
+                                     llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  auto context{builder.getContext()};
+  auto arg0{getBase(args[0])};
+  auto arg1{getBase(args[1])};
+
+  // Prepare the return type in FIR.
+  auto vecResTyInfo{getVecTypeFromFirType(resultType)};
+  auto mlirTy{vecResTyInfo.toMlirVectorType(context)};
+  auto firTy{vecResTyInfo.toFirVectorType()};
+
+  // llvm.ppc.altivec.lvx* returns <4xi32>
+  // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type
+  const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)};
+  const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)};
+
+  // For vec_ld, need to convert arg0 from i64 to i32
+  if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64)
+    arg0 = builder.createConvert(loc, i32Ty, arg0);
+
+  // Add the %val of arg0 to %addr of arg1
+  auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
+  llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
+
+  mlir::Type intrinResTy{nullptr};
+  llvm::StringRef fname{};
+  switch (vop) {
+  case VecOp::Ld:
+    fname = "llvm.ppc.altivec.lvx";
+    intrinResTy = mVecI32Ty;
+    break;
+  case VecOp::Lde:
+    switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) {
+    case 8:
+      fname = "llvm.ppc.altivec.lvebx";
+      intrinResTy = mlirTy;
+      break;
+    case 16:
+      fname = "llvm.ppc.altivec.lvehx";
+      intrinResTy = mlirTy;
+      break;
+    case 32:
+      fname = "llvm.ppc.altivec.lvewx";
+      if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy))
+        intrinResTy = mlirTy;
+      else
+        intrinResTy = mVecI32Ty;
+      break;
+    default:
+      llvm_unreachable("invalid vector for vec_lde");
+    }
+    break;
+  case VecOp::Ldl:
+    fname = "llvm.ppc.altivec.lvxl";
+    intrinResTy = mVecI32Ty;
+    break;
+  case VecOp::Lxvp:
+    fname = "llvm.ppc.vsx.lxvp";
+    intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1));
+    break;
+  case VecOp::Xld2: {
+    fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be"
+                                   : "llvm.ppc.vsx.lxvd2x";
+    // llvm.ppc.altivec.lxvd2x* returns <2 x double>
+    intrinResTy = mlir::VectorType::get(2, mlir::FloatType::getF64(context));
+  } break;
+  case VecOp::Xlw4:
+    fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be"
+                                   : "llvm.ppc.vsx.lxvw4x";
+    // llvm.ppc.altivec.lxvw4x* returns <4xi32>
+    intrinResTy = mVecI32Ty;
+    break;
+  default:
+    llvm_unreachable("invalid vector operation for generator");
+  }
+
+  auto funcType{
+      mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})};
+  auto funcOp{builder.addNamedFunction(loc, fname, funcType)};
+  auto result{
+      builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
+
+  if (vop == VecOp::Lxvp)
+    return result;
+
+  if (intrinResTy != mlirTy)
+    result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result);
+
+  if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE())
+    return builder.createConvert(
+        loc, firTy,
+        reverseVectorElements(builder, loc, result, vecResTyInfo.len));
+
+  return builder.createConvert(loc, firTy, result);
+}
+
 // VEC_NMADD, VEC_MSUB
 template <VecOp vop>
 fir::ExtendedValue
@@ -1782,33 +1938,6 @@ void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) {
   }
 }
 
-static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
-                                      mlir::Location loc, mlir::Value baseAddr,
-                                      mlir::Value offset) {
-  auto typeExtent{fir::SequenceType::getUnknownExtent()};
-  // Construct an !fir.ref<!ref.array<?xi8>> type
-  auto arrRefTy{builder.getRefType(fir::SequenceType::get(
-      {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
-  // Convert arg to !fir.ref<!ref.array<?xi8>>
-  auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
-
-  return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
-}
-
-static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
-                                         mlir::Location loc, mlir::Value v,
-                                         int64_t len) {
-  assert(v.getType().isa<mlir::VectorType>());
-  assert(len > 0);
-  llvm::SmallVector<int64_t, 16> mask;
-  for (int64_t i = 0; i < len; ++i) {
-    mask.push_back(len - 1 - i);
-  }
-
-  auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
-  return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
-}
-
 // VEC_ST, VEC_STE
 template <VecOp vop>
 void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {

diff  --git a/flang/module/__ppc_intrinsics.f90 b/flang/module/__ppc_intrinsics.f90
index 448429b668f955..c121977046ff7b 100644
--- a/flang/module/__ppc_intrinsics.f90
+++ b/flang/module/__ppc_intrinsics.f90
@@ -193,6 +193,87 @@ elemental vector(real(VKIND)) function elem_func_vr##VKIND##vr##VKIND##i0(arg1,
     !dir$ ignore_tkr(k) arg2; \
   end function ;
 
+! vector(i) function f(i, integer)
+#define FUNC_VII0I(VKIND) \
+  pure vector(integer(VKIND)) function func_vi##VKIND##i0i##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    integer(VKIND), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(r) function f(i, real)
+#define FUNC_VRI0R(VKIND) \
+  pure vector(real(VKIND)) function func_vr##VKIND##i0r##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    real(VKIND), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(i) function f(i, vector(i))
+#define FUNC_VII0VI(VKIND) \
+  pure vector(integer(VKIND)) function func_vi##VKIND##i0vi##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(integer(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(u) function f(i, vector(u))
+#define FUNC_VUI0VU(VKIND) \
+  pure vector(unsigned(VKIND)) function func_vu##VKIND##i0vu##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(unsigned(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(r) function f(i, vector(r))
+#define FUNC_VRI0VR(VKIND) \
+  pure vector(real(VKIND)) function func_vr##VKIND##i0vr##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(real(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! __vector_pair function f(i, vector(i))
+#define FUNC_VPI0VI(VKIND) \
+  pure __vector_pair function func_vpi0vi##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(integer(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function;
+
+! __vector_pair function f(i, vector(u))
+#define FUNC_VPI0VU(VKIND) \
+  pure __vector_pair function func_vpi0vu##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(unsigned(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function;
+
+! __vector_pair function f(i, vector(r))
+#define FUNC_VPI0VR(VKIND) \
+  pure __vector_pair function func_vpi0vr##VKIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    vector(real(VKIND)), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function;
+
+! __vector_pair function f(i, __vector_pair)
+#define FUNC_VPI0VP \
+  pure __vector_pair function func_vpi0vp(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    __vector_pair, intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function;
+
 ! The following macros are specific for the vec_convert(v, mold) intrinsics as
 ! the argument keywords are 
diff erent from the other vector intrinsics.
 !
@@ -249,10 +330,28 @@ pure vector(real(VKIND)) function func_vec_convert_vr##VKIND##vi##vr##VKIND(v, m
   ELEM_FUNC_VIVII0(1) ELEM_FUNC_VIVII0(2) ELEM_FUNC_VIVII0(4) ELEM_FUNC_VIVII0(8)
   ELEM_FUNC_VUVUI0(1) ELEM_FUNC_VUVUI0(2) ELEM_FUNC_VUVUI0(4) ELEM_FUNC_VUVUI0(8)
   ELEM_FUNC_VRVRI0(4) ELEM_FUNC_VRVRI0(8)
-  
+  FUNC_VII0VI(1) FUNC_VII0VI(2) FUNC_VII0VI(4) FUNC_VII0VI(8)
+  FUNC_VUI0VU(1) FUNC_VUI0VU(2) FUNC_VUI0VU(4) FUNC_VUI0VU(8)
+  FUNC_VRI0VR(4) FUNC_VRI0VR(8)
+  FUNC_VII0I(1) FUNC_VII0I(2) FUNC_VII0I(4) FUNC_VII0I(8)
+  FUNC_VRI0R(4) FUNC_VRI0R(8)
+  FUNC_VPI0VI(1) FUNC_VPI0VI(2) FUNC_VPI0VI(4) FUNC_VPI0VI(8)
+  FUNC_VPI0VU(1) FUNC_VPI0VU(2) FUNC_VPI0VU(4) FUNC_VPI0VU(8)
+  FUNC_VPI0VR(4) FUNC_VPI0VR(8)
+  FUNC_VPI0VP
+
 #undef FUNC_VEC_CONVERT_VRVIVR
 #undef FUNC_VEC_CONVERT_VUVIVU
 #undef FUNC_VEC_CONVERT_VIVIVI
+#undef FUNC_VPI0VP
+#undef FUNC_VPI0VR
+#undef FUNC_VPI0VU
+#undef FUNC_VPI0VI
+#undef FUNC_VRI0VR
+#undef FUNC_VUI0VU
+#undef FUNC_VII0VI
+#undef FUNC_VRI0R
+#undef FUNC_VII0I
 #undef ELEM_FUNC_VRVRI0
 #undef ELEM_FUNC_VUVUI0
 #undef ELEM_FUNC_VIVII0
@@ -1055,6 +1154,154 @@ end function func_r8r8i
 #undef VU_VI_VI
 #undef VI_VI_VI
 
+!-------------------------------------------------------
+! vector function(integer, i/u/r/vector)
+!-------------------------------------------------------
+! i0 means the integer argument has ignore_tkr(k)
+#define VI_I0_VI(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0##vi##VKIND
+#define VU_I0_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##i0##vu##VKIND
+#define VR_I0_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0##vr##VKIND
+#define VI_I0_I(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0##i##VKIND
+#define VR_I0_R(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0##r##VKIND
+
+#define VEC_VI_I0_VI(NAME, VKIND) \
+  procedure(func_vi##VKIND##i0##vi##VKIND) :: VI_I0_VI(NAME, VKIND);
+#define VEC_VU_I0_VU(NAME, VKIND) \
+  procedure(func_vu##VKIND##i0##vu##VKIND) :: VU_I0_VU(NAME, VKIND);
+#define VEC_VR_I0_VR(NAME, VKIND) \
+  procedure(func_vr##VKIND##i0##vr##VKIND) :: VR_I0_VR(NAME, VKIND);
+#define VEC_VI_I0_I(NAME, VKIND) \
+  procedure(func_vi##VKIND##i0##i##VKIND) :: VI_I0_I(NAME, VKIND);
+#define VEC_VR_I0_R(NAME, VKIND) \
+  procedure(func_vr##VKIND##i0##r##VKIND) :: VR_I0_R(NAME, VKIND);
+
+! vec_ld
+  VEC_VI_I0_VI(vec_ld,1) VEC_VI_I0_VI(vec_ld,2) VEC_VI_I0_VI(vec_ld,4)
+  VEC_VU_I0_VU(vec_ld,1) VEC_VU_I0_VU(vec_ld,2) VEC_VU_I0_VU(vec_ld,4)
+  VEC_VR_I0_VR(vec_ld,4)
+  VEC_VI_I0_I(vec_ld,1) VEC_VI_I0_I(vec_ld,2) VEC_VI_I0_I(vec_ld,4)
+  VEC_VR_I0_R(vec_ld,4)
+  interface vec_ld
+    procedure :: VI_I0_VI(vec_ld,1), VI_I0_VI(vec_ld,2), VI_I0_VI(vec_ld,4)
+    procedure :: VU_I0_VU(vec_ld,1), VU_I0_VU(vec_ld,2), VU_I0_VU(vec_ld,4)
+    procedure :: VR_I0_VR(vec_ld,4)
+    procedure :: VI_I0_I(vec_ld,1), VI_I0_I(vec_ld,2), VI_I0_I(vec_ld,4)
+    procedure :: VR_I0_R(vec_ld,4)
+  end interface
+  public :: vec_ld
+
+! vec_lde
+  VEC_VI_I0_I(vec_lde,1) VEC_VI_I0_I(vec_lde,2) VEC_VI_I0_I(vec_lde,4)
+  VEC_VR_I0_R(vec_lde,4)
+  interface vec_lde
+    procedure :: VI_I0_I(vec_lde,1), VI_I0_I(vec_lde,2), VI_I0_I(vec_lde,4)
+    procedure :: VR_I0_R(vec_lde,4)
+  end interface
+  public :: vec_lde
+
+! vec_ldl
+  VEC_VI_I0_VI(vec_ldl,1) VEC_VI_I0_VI(vec_ldl,2) VEC_VI_I0_VI(vec_ldl,4)
+  VEC_VU_I0_VU(vec_ldl,1) VEC_VU_I0_VU(vec_ldl,2) VEC_VU_I0_VU(vec_ldl,4)
+  VEC_VR_I0_VR(vec_ldl,4)
+  VEC_VI_I0_I(vec_ldl,1) VEC_VI_I0_I(vec_ldl,2) VEC_VI_I0_I(vec_ldl,4)
+  VEC_VR_I0_R(vec_ldl,4)
+  interface vec_ldl
+    procedure :: VI_I0_VI(vec_ldl,1), VI_I0_VI(vec_ldl,2), VI_I0_VI(vec_ldl,4)
+    procedure :: VU_I0_VU(vec_ldl,1), VU_I0_VU(vec_ldl,2), VU_I0_VU(vec_ldl,4)
+    procedure :: VR_I0_VR(vec_ldl,4)
+    procedure :: VI_I0_I(vec_ldl,1), VI_I0_I(vec_ldl,2), VI_I0_I(vec_ldl,4)
+    procedure :: VR_I0_R(vec_ldl,4)
+  end interface
+  public :: vec_ldl
+
+! vec_xld2
+  VEC_VI_I0_VI(vec_xld2_,1) VEC_VI_I0_VI(vec_xld2_,2) VEC_VI_I0_VI(vec_xld2_,4) VEC_VI_I0_VI(vec_xld2_,8)
+  VEC_VU_I0_VU(vec_xld2_,1) VEC_VU_I0_VU(vec_xld2_,2) VEC_VU_I0_VU(vec_xld2_,4) VEC_VU_I0_VU(vec_xld2_,8)
+  VEC_VR_I0_VR(vec_xld2_,4) VEC_VR_I0_VR(vec_xld2_,8)
+  VEC_VI_I0_I(vec_xld2_,1) VEC_VI_I0_I(vec_xld2_,2) VEC_VI_I0_I(vec_xld2_,4) VEC_VI_I0_I(vec_xld2_,8)
+  VEC_VR_I0_R(vec_xld2_,4) VEC_VR_I0_R(vec_xld2_,8)
+  interface vec_xld2
+    procedure :: VI_I0_VI(vec_xld2_,1), VI_I0_VI(vec_xld2_,2), VI_I0_VI(vec_xld2_,4), VI_I0_VI(vec_xld2_,8)
+    procedure :: VU_I0_VU(vec_xld2_,1), VU_I0_VU(vec_xld2_,2), VU_I0_VU(vec_xld2_,4), VU_I0_VU(vec_xld2_,8)
+    procedure :: VR_I0_VR(vec_xld2_,4), VR_I0_VR(vec_xld2_,8)
+    procedure :: VI_I0_I(vec_xld2_,1), VI_I0_I(vec_xld2_,2), VI_I0_I(vec_xld2_,4), VI_I0_I(vec_xld2_,8)
+    procedure :: VR_I0_R(vec_xld2_,4), VR_I0_R(vec_xld2_,8)
+  end interface
+  public :: vec_xld2
+
+! vec_xlw4
+  VEC_VI_I0_VI(vec_xlw4_,1) VEC_VI_I0_VI(vec_xlw4_,2)
+  VEC_VU_I0_VU(vec_xlw4_,1) VEC_VU_I0_VU(vec_xlw4_,2) VEC_VU_I0_VU(vec_xlw4_,4)
+  VEC_VR_I0_VR(vec_xlw4_,4)
+  VEC_VI_I0_I(vec_xlw4_,1) VEC_VI_I0_I(vec_xlw4_,2) VEC_VI_I0_I(vec_xlw4_,4)
+  VEC_VR_I0_R(vec_xlw4_,4)
+  interface vec_xlw4
+    procedure :: VI_I0_VI(vec_xlw4_,1), VI_I0_VI(vec_xlw4_,2)
+    procedure :: VU_I0_VU(vec_xlw4_,1), VU_I0_VU(vec_xlw4_,2), VU_I0_VU(vec_xlw4_,4)
+    procedure :: VR_I0_VR(vec_xlw4_,4)
+    procedure :: VI_I0_I(vec_xlw4_,1), VI_I0_I(vec_xlw4_,2), VI_I0_I(vec_xlw4_,4)
+    procedure :: VR_I0_R(vec_xlw4_,4)
+  end interface
+  public :: vec_xlw4
+
+#undef VEC_VR_I0_R
+#undef VEC_VI_I0_I
+#undef VEC_VR_I0_VR
+#undef VEC_VU_I0_VU
+#undef VEC_VI_I0_VI
+#undef VR_I0_R
+#undef VI_I0_I
+#undef VR_I0_VR
+#undef VU_I0_VU
+#undef VI_I0_VI
+
+!-------------------------------------------------------
+! __vector_pair function(integer, vector/__vector_pair)
+!-------------------------------------------------------
+#define VP_I0_VI(NAME, VKIND) __ppc_##NAME##_vpi0##vi##VKIND
+#define VP_I0_VU(NAME, VKIND) __ppc_##NAME##_vpi0##vu##VKIND
+#define VP_I0_VR(NAME, VKIND) __ppc_##NAME##_vpi0##vr##VKIND
+#define VP_I0_VP(NAME) __ppc_##NAME##_vpi0vp0
+
+#define VEC_VP_I0_VI(NAME, VKIND) \
+  procedure(func_vpi0vi##VKIND) :: VP_I0_VI(NAME, VKIND);
+#define VEC_VP_I0_VU(NAME, VKIND) \
+  procedure(func_vpi0vu##VKIND) :: VP_I0_VU(NAME, VKIND);
+#define VEC_VP_I0_VR(NAME, VKIND) \
+  procedure(func_vpi0vr##VKIND) :: VP_I0_VR(NAME, VKIND);
+#define VEC_VP_I0_VP(NAME) procedure(func_vpi0vp) :: VP_I0_VP(NAME);
+
+! vec_lxvp
+  VEC_VP_I0_VI(vec_lxvp,1) VEC_VP_I0_VI(vec_lxvp,2) VEC_VP_I0_VI(vec_lxvp,4) VEC_VP_I0_VI(vec_lxvp,8)
+  VEC_VP_I0_VU(vec_lxvp,1) VEC_VP_I0_VU(vec_lxvp,2) VEC_VP_I0_VU(vec_lxvp,4) VEC_VP_I0_VU(vec_lxvp,8)
+  VEC_VP_I0_VR(vec_lxvp,4) VEC_VP_I0_VR(vec_lxvp,8)
+  VEC_VP_I0_VP(vec_lxvp)
+  interface vec_lxvp
+     procedure :: VP_I0_VI(vec_lxvp,1), VP_I0_VI(vec_lxvp,2), VP_I0_VI(vec_lxvp,4), VP_I0_VI(vec_lxvp,8)
+     procedure :: VP_I0_VU(vec_lxvp,1), VP_I0_VU(vec_lxvp,2), VP_I0_VU(vec_lxvp,4), VP_I0_VU(vec_lxvp,8)
+     procedure :: VP_I0_VR(vec_lxvp,4), VP_I0_VR(vec_lxvp,8)
+     procedure :: VP_I0_VP(vec_lxvp)
+  end interface vec_lxvp
+  public :: vec_lxvp
+
+! vsx_lxvp (alias to vec_lxvp)
+  interface vsx_lxvp
+     procedure :: VP_I0_VI(vec_lxvp,1), VP_I0_VI(vec_lxvp,2), VP_I0_VI(vec_lxvp,4), VP_I0_VI(vec_lxvp,8)
+     procedure :: VP_I0_VU(vec_lxvp,1), VP_I0_VU(vec_lxvp,2), VP_I0_VU(vec_lxvp,4), VP_I0_VU(vec_lxvp,8)
+     procedure :: VP_I0_VR(vec_lxvp,4), VP_I0_VR(vec_lxvp,8)
+     procedure :: VP_I0_VP(vec_lxvp)
+  end interface vsx_lxvp
+  public :: vsx_lxvp
+
+#undef VEC_VP_I_VP
+#undef VEC_VP_I_VR
+#undef VEC_VP_I_VU
+#undef VEC_VP_I_VI
+#undef VP_I_VP
+#undef VP_I_VR
+#undef VP_I_VU
+#undef VP_I_VI
+
 !-----------------------------------------
 ! vector function(vector, vector, vector)
 !-----------------------------------------

diff  --git a/flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90 b/flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90
index ec16181f9d6cd6..92875567503ba3 100644
--- a/flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90
+++ b/flang/test/Lower/PowerPC/ppc-pwr10-vec-intrinsics.f90
@@ -26,6 +26,248 @@ end subroutine test_cvbf16spn
 !CHECK:  %4 = call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> %3)
 !CHECK:  store <16 x i8> %4, ptr %1, align 16
 
+!----------------------
+! vec_lxvp
+!----------------------
+
+      subroutine vec_lxvp_test_i2(v1, offset, vp)
+      integer(2) :: offset
+      vector(integer(2)) :: v1
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_i2
+
+!CHECK-LABEL: @vec_lxvp_test_i2_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_i4(v1, offset, vp)
+      integer(2) :: offset
+      vector(integer(4)) :: v1
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_i4
+
+!CHECK-LABEL: @vec_lxvp_test_i4_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_u2(v1, offset, vp)
+      integer(2) :: offset
+      vector(unsigned(2)) :: v1
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_u2
+
+!CHECK-LABEL: @vec_lxvp_test_u2_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_u4(v1, offset, vp)
+      integer(2) :: offset
+      vector(unsigned(4)) :: v1
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_u4
+
+!CHECK-LABEL: @vec_lxvp_test_u4_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_r4(v1, offset, vp)
+      integer(2) :: offset
+      vector(real(4)) :: v1
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_r4
+
+!CHECK-LABEL: @vec_lxvp_test_r4_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_r8(v1, offset, vp)
+      integer(2) :: offset
+      vector(real(8)) :: v1
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_r8
+
+!CHECK-LABEL: @vec_lxvp_test_r8_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_vp(v1, offset, vp)
+      integer(2) :: offset
+      __vector_pair :: v1
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_vp
+
+!CHECK-LABEL: @vec_lxvp_test_vp_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_i2_arr(v1, offset, vp)
+      integer :: offset
+      vector(integer(2)) :: v1(10)
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_i2_arr
+
+!CHECK-LABEL: @vec_lxvp_test_i2_arr_
+!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_i4_arr(v1, offset, vp)
+      integer :: offset
+      vector(integer(4)) :: v1(10)
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_i4_arr
+
+!CHECK-LABEL: @vec_lxvp_test_i4_arr_
+!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_u2_arr(v1, offset, vp)
+      integer :: offset
+      vector(unsigned(2)) :: v1(10)
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_u2_arr
+
+!CHECK-LABEL: @vec_lxvp_test_u2_arr_
+!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_u4_arr(v1, offset, vp)
+      integer :: offset
+      vector(unsigned(4)) :: v1(10)
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_u4_arr
+
+!CHECK-LABEL: @vec_lxvp_test_u4_arr_
+!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_r4_arr(v1, offset, vp)
+      integer :: offset
+      vector(real(4)) :: v1(10)
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_r4_arr
+
+!CHECK-LABEL: @vec_lxvp_test_r4_arr_
+!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_r8_arr(v1, offset, vp)
+      integer :: offset
+      vector(real(8)) :: v1(10)
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_r8_arr
+
+!CHECK-LABEL: @vec_lxvp_test_r8_arr_
+!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vec_lxvp_test_vp_arr(v1, offset, vp)
+      integer(8) :: offset
+      __vector_pair :: v1(10)
+      __vector_pair :: vp
+      vp = vec_lxvp(offset, v1)
+      end subroutine vec_lxvp_test_vp_arr
+
+!CHECK-LABEL: @vec_lxvp_test_vp_arr_
+!CHECK:  %[[offset:.*]] = load i64, ptr %1, align 8
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+!----------------------
+! vsx_lxvp
+!----------------------
+
+      subroutine vsx_lxvp_test_i4(v1, offset, vp)
+      integer(2) :: offset
+      vector(integer(4)) :: v1
+      __vector_pair :: vp
+      vp = vsx_lxvp(offset, v1)
+      end subroutine vsx_lxvp_test_i4
+
+!CHECK-LABEL: @vsx_lxvp_test_i4_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vsx_lxvp_test_r8(v1, offset, vp)
+      integer(2) :: offset
+      vector(real(8)) :: v1
+      __vector_pair :: vp
+      vp = vsx_lxvp(offset, v1)
+      end subroutine vsx_lxvp_test_r8
+
+!CHECK-LABEL: @vsx_lxvp_test_r8_
+!CHECK:  %[[offset:.*]] = load i16, ptr %1, align 2
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i16 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vsx_lxvp_test_i2_arr(v1, offset, vp)
+      integer :: offset
+      vector(integer(2)) :: v1(10)
+      __vector_pair :: vp
+      vp = vsx_lxvp(offset, v1)
+      end subroutine vsx_lxvp_test_i2_arr
+
+!CHECK-LABEL: @vsx_lxvp_test_i2_arr_
+!CHECK:  %[[offset:.*]] = load i32, ptr %1, align 4
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i32 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
+      subroutine vsx_lxvp_test_vp_arr(v1, offset, vp)
+      integer(8) :: offset
+      __vector_pair :: v1(10)
+      __vector_pair :: vp
+      vp = vsx_lxvp(offset, v1)
+      end subroutine vsx_lxvp_test_vp_arr
+
+!CHECK-LABEL: @vsx_lxvp_test_vp_arr_
+!CHECK:  %[[offset:.*]] = load i64, ptr %1, align 8
+!CHECK:  %[[addr:.*]] = getelementptr i8, ptr %0, i64 %[[offset]]
+!CHECK:  %[[call:.*]] = call <256 x i1> @llvm.ppc.vsx.lxvp(ptr %[[addr]])
+!CHECK:  store <256 x i1> %[[call]], ptr %2, align 32
+
 !----------------------
 ! vec_stxvp
 !----------------------

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
new file mode 100644
index 00000000000000..995d5376a77652
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
@@ -0,0 +1,522 @@
+! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="FIR" %s
+! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!-------------------
+! vec_ld
+!-------------------
+
+! CHECK-LABEL: @vec_ld_testi8
+subroutine vec_ld_testi8(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8>
+! LLVMIR: %[[shflv:.*]] = shufflevector <16 x i8> %[[bc]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi8
+
+! CHECK-LABEL: @vec_ld_testi16
+subroutine vec_ld_testi16(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16>
+! LLVMIR: %[[shflv:.*]] = shufflevector <8 x i16> %[[bc]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi16
+
+! CHECK-LABEL: @vec_ld_testi32
+subroutine vec_ld_testi32(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi32
+
+! CHECK-LABEL: @vec_ld_testf32
+subroutine vec_ld_testf32(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[i4:.*]] = fir.convert %[[arg1]] : (i64) -> i32
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[i4]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[i4:.*]] = trunc i64 %[[arg1]] to i32
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[i4]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testf32
+
+! CHECK-LABEL: @vec_ld_testu32
+subroutine vec_ld_testu32(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(unsigned(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testu32
+
+! CHECK-LABEL: @vec_ld_testi32a
+subroutine vec_ld_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(10)
+  vector(integer(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi32a
+
+! CHECK-LABEL: @vec_ld_testf32av
+subroutine vec_ld_testf32av(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2(2, 4, 8)
+  vector(real(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[i4:.*]] = fir.convert %[[arg1]] : (i64) -> i32
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[i4]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[i4:.*]] = trunc i64 %[[arg1]] to i32
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[i4]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testf32av
+
+! CHECK-LABEL: @vec_ld_testi32s
+subroutine vec_ld_testi32s(arg1, arg2, res)
+  integer(4) :: arg1
+  real(4) :: arg2
+  vector(real(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
+end subroutine vec_ld_testi32s
+
+!-------------------
+! vec_lde
+!-------------------
+
+! CHECK-LABEL: @vec_lde_testi8s
+subroutine vec_lde_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(integer(1)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvebx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvebx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[shflv]], ptr %2, align 16
+end subroutine vec_lde_testi8s
+
+! CHECK-LABEL: @vec_lde_testi16a
+subroutine vec_lde_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 11, 7)
+  vector(integer(2)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x11x7xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvehx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<8xi16>
+! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <8 x i16> @llvm.ppc.altivec.lvehx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <8 x i16> %[[ld]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[shflv]], ptr %2, align 16
+end subroutine vec_lde_testi16a
+
+! CHECK-LABEL: @vec_lde_testi32a
+subroutine vec_lde_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(5)
+  vector(integer(4)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<5xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ld]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[shflv]], ptr %2, align 16
+end subroutine vec_lde_testi32a
+
+! CHECK-LABEL: @vec_lde_testf32a
+subroutine vec_lde_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(11)
+  vector(real(4)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[bc]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: %[[shflv:.*]] = shufflevector <4 x float> %[[bc]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
+end subroutine vec_lde_testf32a
+
+!-------------------
+! vec_xld2
+!-------------------
+
+! CHECK-LABEL: @vec_xld2_testi8a
+subroutine vec_xld2_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi8a
+
+! CHECK-LABEL: @vec_xld2_testi16a
+subroutine vec_xld2_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2(4)
+  vector(integer(2)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <8 x i16>
+! LLVMIR:  store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi16a
+
+! CHECK-LABEL: @vec_xld2_testi32a
+subroutine vec_xld2_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2(11)
+  vector(integer(4)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11x!fir.vector<4:i32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi32a
+
+! CHECK-LABEL: @vec_xld2_testi64a
+subroutine vec_xld2_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(integer(8)) :: arg2(31,7)
+  vector(integer(8)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<31x7x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi64a
+
+! CHECK-LABEL: @vec_xld2_testf32a
+subroutine vec_xld2_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(real(4)) :: arg2(5)
+  vector(real(4)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<5x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testf32a
+
+! CHECK-LABEL: @vec_xld2_testf64a
+subroutine vec_xld2_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(8)) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_xld2_testf64a
+
+!-------------------
+! vec_xlw4
+!-------------------
+
+! CHECK-LABEL: @vec_xlw4_testi8a
+subroutine vec_xlw4_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2(2, 11, 37)
+  vector(integer(1)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x11x37x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_xlw4_testi8a
+
+! CHECK-LABEL: @vec_xlw4_testi16a
+subroutine vec_xlw4_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2(2, 8)
+  vector(integer(2)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x8x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_xlw4_testi16a
+
+! CHECK-LABEL: @vec_xlw4_testu32a
+subroutine vec_xlw4_testu32a(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(unsigned(4)) :: arg2(8, 4)
+  vector(unsigned(4)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<8x4x!fir.vector<4:ui32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_xlw4_testu32a
+
+! CHECK-LABEL: @vec_xlw4_testf32a
+subroutine vec_xlw4_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(real(4)) :: arg2
+  vector(real(4)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xlw4_testf32a

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-load.f90 b/flang/test/Lower/PowerPC/ppc-vec-load.f90
new file mode 100644
index 00000000000000..ed08f60789a0ac
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-load.f90
@@ -0,0 +1,655 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="FIR" %s
+! RUN: %flang -emit-llvm -S %s -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_ld
+!----------------------
+
+! CHECK-LABEL: @vec_ld_testi8
+subroutine vec_ld_testi8(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %{{.*}}, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testi8
+
+! CHECK-LABEL: @vec_ld_testi16
+subroutine vec_ld_testi16(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testi16
+
+! CHECK-LABEL: @vec_ld_testi32
+subroutine vec_ld_testi32(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[bc:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testi32
+
+! CHECK-LABEL: @vec_ld_testf32
+subroutine vec_ld_testf32(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+! FIR: %[[arg1i32:.*]] = fir.convert %[[arg1]] : (i64) -> i32
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1i32]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[arg1i32:.*]] = trunc i64 %[[arg1]] to i32
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1i32]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testf32
+
+! CHECK-LABEL: @vec_ld_testu32
+subroutine vec_ld_testu32(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(unsigned(4)) :: arg2, res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_ld_testu32
+
+! CHECK-LABEL: @vec_ld_testi32a
+subroutine vec_ld_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(10)
+  vector(integer(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_ld_testi32a
+
+! CHECK-LABEL: @vec_ld_testf32av
+subroutine vec_ld_testf32av(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2(2, 4, 8)
+  vector(real(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+! FIR: %[[arg1i32:.*]] = fir.convert %[[arg1]] : (i64) -> i32
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1i32]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[arg1i32:.*]] = trunc i64 %[[arg1]] to i32
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1i32]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testf32av
+
+! CHECK-LABEL: @vec_ld_testi32s
+subroutine vec_ld_testi32s(arg1, arg2, res)
+  integer(4) :: arg1
+  real(4) :: arg2
+  vector(real(4)) :: res
+  res = vec_ld(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ld_testi32s
+
+!----------------------
+! vec_lde
+!----------------------
+
+! CHECK-LABEL: @vec_lde_testi8s
+subroutine vec_lde_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(integer(1)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvebx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <16 x i8> @llvm.ppc.altivec.lvebx(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[call]], ptr %2, align 16
+end subroutine vec_lde_testi8s
+
+! CHECK-LABEL: @vec_lde_testi16a
+subroutine vec_lde_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvehx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<8xi16>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <8 x i16> @llvm.ppc.altivec.lvehx(ptr %[[addr]])
+! LLVMIR: store <8 x i16> %[[call]], ptr %2, align 16
+end subroutine vec_lde_testi16a
+
+! CHECK-LABEL: @vec_lde_testi32a
+subroutine vec_lde_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(4)
+  vector(integer(4)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_lde_testi32a
+
+! CHECK-LABEL: @vec_lde_testf32a
+subroutine vec_lde_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_lde(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvewx(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvewx(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_lde_testf32a
+
+!----------------------
+! vec_ldl
+!----------------------
+
+! CHECK-LABEL: @vec_ldl_testi8
+subroutine vec_ldl_testi8(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %{{.*}}, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testi8
+
+! CHECK-LABEL: @vec_ldl_testi16
+subroutine vec_ldl_testi16(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testi16
+
+! CHECK-LABEL: @vec_ldl_testi32
+subroutine vec_ldl_testi32(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[bc:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testi32
+
+! CHECK-LABEL: @vec_ldl_testf32
+subroutine vec_ldl_testf32(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testf32
+
+! CHECK-LABEL: @vec_ldl_testu32
+subroutine vec_ldl_testu32(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(unsigned(4)) :: arg2, res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_ldl_testu32
+
+! CHECK-LABEL: @vec_ldl_testi32a
+subroutine vec_ldl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(10)
+  vector(integer(4)) :: res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[cnv:.*]] = fir.convert %[[call]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[call]], ptr %2, align 16
+end subroutine vec_ldl_testi32a
+
+! CHECK-LABEL: @vec_ldl_testf32av
+subroutine vec_ldl_testf32av(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(4)) :: arg2(2, 4, 8)
+  vector(real(4)) :: res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testf32av
+
+! CHECK-LABEL: @vec_ldl_testi32s
+subroutine vec_ldl_testi32s(arg1, arg2, res)
+  integer(4) :: arg1
+  real(4) :: arg2
+  vector(real(4)) :: res
+  res = vec_ldl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %{{.*}} : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[call:.*]] = fir.call @llvm.ppc.altivec.lvxl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[call]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[cnv:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[cnv]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[call:.*]] = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[call]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_ldl_testi32s
+
+!----------------------
+! vec_xld2
+!----------------------
+
+! CHECK-LABEL: @vec_xld2_testi8a
+subroutine vec_xld2_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi8a
+
+! CHECK-LABEL: @vec_xld2_testi16
+subroutine vec_xld2_testi16(arg1, arg2, res)
+  integer :: arg1
+  vector(integer(2)) :: arg2
+  vector(integer(2)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi16
+
+! CHECK-LABEL: @vec_xld2_testi32a
+subroutine vec_xld2_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(integer(4)) :: arg2(41)
+  vector(integer(4)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<41x!fir.vector<4:i32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x i32>
+! LLVMIR: store <4 x i32> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi32a
+
+! CHECK-LABEL: @vec_xld2_testi64a
+subroutine vec_xld2_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(integer(8)) :: arg2(4)
+  vector(integer(8)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testi64a
+
+! CHECK-LABEL: @vec_xld2_testf32a
+subroutine vec_xld2_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(real(4)) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xld2_testf32a
+
+! CHECK-LABEL: @vec_xld2_testf64a
+subroutine vec_xld2_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(8)) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xld2(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_xld2_testf64a
+
+!----------------------
+! vec_xlw4
+!----------------------
+
+! CHECK-LABEL: @vec_xlw4_testi8a
+subroutine vec_xlw4_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  vector(integer(1)) :: arg2(2, 4, 8)
+  vector(integer(1)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<16:i8>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <16 x i8>
+! LLVMIR: store <16 x i8> %[[res]], ptr %2, align 16
+end subroutine vec_xlw4_testi8a
+
+! CHECK-LABEL: @vec_xlw4_testi16a
+subroutine vec_xlw4_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(integer(2)) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<8:i16>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <8 x i16>
+! LLVMIR: store <8 x i16> %[[res]], ptr %2, align 16
+end subroutine vec_xlw4_testi16a
+
+! CHECK-LABEL: @vec_xlw4_testu32a
+subroutine vec_xlw4_testu32a(arg1, arg2, res)
+  integer(4) :: arg1
+  vector(unsigned(4)) :: arg2(2, 4, 8)
+  vector(unsigned(4)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8x!fir.vector<4:ui32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:ui32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:ui32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_xlw4_testu32a
+
+! CHECK-LABEL: @vec_xlw4_testf32a
+subroutine vec_xlw4_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  vector(real(4)) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xlw4(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<4:f32>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: %[[res:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[res]], ptr %2, align 16
+end subroutine vec_xlw4_testf32a