[flang-commits] [flang] 2de024e - [flang] Add PowerPC vec_lxv, vec_lvsl, vec_lvsr, vec_xl, vec_xl_be and vec_xlds intrinsic

Fri Aug 18 12:02:32 PDT 2023

Author: Kelvin Li
Date: 2023-08-18T15:02:15-04:00
New Revision: 2de024ef311d082c6a435d638efb99b923cca2d8

URL: https://github.com/llvm/llvm-project/commit/2de024ef311d082c6a435d638efb99b923cca2d8
DIFF: https://github.com/llvm/llvm-project/commit/2de024ef311d082c6a435d638efb99b923cca2d8.diff

LOG: [flang] Add PowerPC vec_lxv, vec_lvsl, vec_lvsr, vec_xl, vec_xl_be and vec_xlds intrinsic

Differential Revision: https://reviews.llvm.org/D157920

Added: 
    

Modified: 
    flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
    flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
    flang/module/__ppc_intrinsics.f90
    flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
    flang/test/Lower/PowerPC/ppc-vec-load.f90

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
index 68ae78abf9dc97..1e87bf0f6ad159 100644

--- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
@@ -32,6 +32,9 @@ enum class VecOp {
   Ld,
   Lde,
   Ldl,
+  Lvsl,
+  Lvsr,
+  Lxv,
   Lxvp,
   Mergeh,
   Mergel,
@@ -57,6 +60,8 @@ enum class VecOp {
   Stxv,
   Stxvp,
   Sub,
+  Xl,
+  Xlbe,
   Xld2,
   Xlw4,
   Xor,
@@ -275,10 +280,21 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
   fir::ExtendedValue genVecPerm(mlir::Type resultType,
                                 llvm::ArrayRef<fir::ExtendedValue> args);
 
+  fir::ExtendedValue genVecXlGrp(mlir::Type resultType,
+                                 llvm::ArrayRef<fir::ExtendedValue> args);
+
   template <VecOp>
   fir::ExtendedValue genVecLdCallGrp(mlir::Type resultType,
                                      llvm::ArrayRef<fir::ExtendedValue> args);
 
+  template <VecOp>
+  fir::ExtendedValue genVecLdNoCallGrp(mlir::Type resultType,
+                                       llvm::ArrayRef<fir::ExtendedValue> args);
+
+  template <VecOp>
+  fir::ExtendedValue genVecLvsGrp(mlir::Type resultType,
+                                  llvm::ArrayRef<fir::ExtendedValue> args);
+
   template <VecOp>
   fir::ExtendedValue genVecNmaddMsub(mlir::Type resultType,
                                      llvm::ArrayRef<fir::ExtendedValue> args);
@@ -299,6 +315,9 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
   template <VecOp vop>
   fir::ExtendedValue genVecSplat(mlir::Type resultType,
                                  llvm::ArrayRef<fir::ExtendedValue> args);
+
+  fir::ExtendedValue genVecXlds(mlir::Type resultType,
+                                llvm::ArrayRef<fir::ExtendedValue> args);
 };
 
 const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name);

diff  --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
index 335812909161d4..ab0d5079d8afe0 100644
--- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
@@ -589,6 +589,21 @@ static constexpr IntrinsicHandler ppcHandlers[]{
          &PI::genVecLdCallGrp<VecOp::Ldl>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
+    {"__ppc_vec_lvsl",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLvsGrp<VecOp::Lvsl>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_lvsr",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLvsGrp<VecOp::Lvsr>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_lxv",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdNoCallGrp<VecOp::Lxv>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
     {"__ppc_vec_lxvp",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Lxvp>),
@@ -713,11 +728,24 @@ static constexpr IntrinsicHandler ppcHandlers[]{
          &PI::genVecAddAndMulSubXor<VecOp::Sub>),
      {{{"arg1", asValue}, {"arg2", asValue}}},
      /*isElemental=*/true},
+    {"__ppc_vec_xl",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
+    {"__ppc_vec_xl_be",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(
+         &PI::genVecLdNoCallGrp<VecOp::Xlbe>),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
     {"__ppc_vec_xld2_",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Xld2>),
      {{{"arg1", asValue}, {"arg2", asAddr}}},
      /*isElemental=*/false},
+    {"__ppc_vec_xlds",
+     static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds),
+     {{{"arg1", asValue}, {"arg2", asAddr}}},
+     /*isElemental=*/false},
     {"__ppc_vec_xlw4_",
      static_cast<IntrinsicLibrary::ExtendedGenerator>(
          &PI::genVecLdCallGrp<VecOp::Xlw4>),
@@ -1797,6 +1825,62 @@ static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
   return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
 }
 
+static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
+                                             const int val) {
+  auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
+  auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
+  return builder.getNamedAttr("alignment", alignAttr);
+}
+
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType,
+                                 llvm::ArrayRef<fir::ExtendedValue> args) {
+  VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)};
+  switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) {
+  case 8:
+    // vec_xlb1
+    return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
+  case 16:
+    // vec_xlh8
+    return genVecLdNoCallGrp<VecOp::Xl>(resultType, args);
+  case 32:
+    // vec_xlw4
+    return genVecLdCallGrp<VecOp::Xlw4>(resultType, args);
+  case 64:
+    // vec_xld2
+    return genVecLdCallGrp<VecOp::Xld2>(resultType, args);
+  default:
+    llvm_unreachable("invalid kind");
+  }
+  llvm_unreachable("invalid vector operation for generator");
+}
+
+template <VecOp vop>
+fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp(
+    mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  auto arg0{getBase(args[0])};
+  auto arg1{getBase(args[1])};
+
+  auto vecTyInfo{getVecTypeFromFirType(resultType)};
+  auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
+  auto firTy{vecTyInfo.toFirVectorType()};
+
+  // Add the %val of arg0 to %addr of arg1
+  auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
+
+  const auto triple{fir::getTargetTriple(builder.getModule())};
+  // Need to get align 1.
+  auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr,
+                                          getAlignmentAttr(builder, 1))};
+  if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) ||
+      (vop == VecOp::Xlbe && triple.isLittleEndian()))
+    return builder.createConvert(
+        loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
+
+  return builder.createConvert(loc, firTy, result);
+}
+
 // VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4
 template <VecOp vop>
 fir::ExtendedValue
@@ -1897,6 +1981,58 @@ PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType,
   return builder.createConvert(loc, firTy, result);
 }
 
+// VEC_LVSL, VEC_LVSR
+template <VecOp vop>
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType,
+                                  llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  auto context{builder.getContext()};
+  auto arg0{getBase(args[0])};
+  auto arg1{getBase(args[1])};
+
+  auto vecTyInfo{getVecTypeFromFirType(resultType)};
+  auto mlirTy{vecTyInfo.toMlirVectorType(context)};
+  auto firTy{vecTyInfo.toFirVectorType()};
+
+  // Convert arg0 to i64 type if needed
+  auto i64ty{mlir::IntegerType::get(context, 64)};
+  if (arg0.getType() != i64ty)
+    arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0);
+
+  // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear
+  //   upper 56 bit while preserving sign
+  auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)};
+  auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)};
+  auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)};
+
+  // Add the offsetArg to %addr of arg1
+  auto addr{addOffsetToAddress(builder, loc, arg1, offset2)};
+  llvm::SmallVector<mlir::Value, 4> parsedArgs{addr};
+
+  llvm::StringRef fname{};
+  switch (vop) {
+  case VecOp::Lvsl:
+    fname = "llvm.ppc.altivec.lvsl";
+    break;
+  case VecOp::Lvsr:
+    fname = "llvm.ppc.altivec.lvsr";
+    break;
+  default:
+    llvm_unreachable("invalid vector operation for generator");
+  }
+  auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})};
+  auto funcOp{builder.addNamedFunction(loc, fname, funcType)};
+  auto result{
+      builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)};
+
+  if (isNativeVecElemOrderOnLE())
+    return builder.createConvert(
+        loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len));
+
+  return builder.createConvert(loc, firTy, result);
+}
+
 // VEC_NMADD, VEC_MSUB
 template <VecOp vop>
 fir::ExtendedValue
@@ -2281,6 +2417,38 @@ PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType,
   return builder.createConvert(loc, retTy, splatOp);
 }
 
+fir::ExtendedValue
+PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType,
+                                llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 2);
+  auto arg0{getBase(args[0])};
+  auto arg1{getBase(args[1])};
+
+  // Prepare the return type in FIR.
+  auto vecTyInfo{getVecTypeFromFirType(resultType)};
+  auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())};
+  auto firTy{vecTyInfo.toFirVectorType()};
+
+  // Add the %val of arg0 to %addr of arg1
+  auto addr{addOffsetToAddress(builder, loc, arg1, arg0)};
+
+  auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)};
+  auto i64VecTy{mlir::VectorType::get(2, i64Ty)};
+  auto i64RefTy{builder.getRefType(i64Ty)};
+  auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)};
+
+  auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)};
+  auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)};
+
+  mlir::Value result{nullptr};
+  if (mlirTy != splatRes.getType()) {
+    result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes);
+  } else
+    result = splatRes;
+
+  return builder.createConvert(loc, firTy, result);
+}
+
 const char *getMmaIrIntrName(MMAOp mmaOp) {
   switch (mmaOp) {
   case MMAOp::AssembleAcc:
@@ -2755,13 +2923,6 @@ void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
   builder.create<fir::CallOp>(loc, funcOp, biArgs);
 }
 
-static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
-                                             const int val) {
-  auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
-  auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
-  return builder.getNamedAttr("alignment", alignAttr);
-}
-
 // VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4
 template <VecOp vop>
 void PPCIntrinsicLibrary::genVecXStore(

diff  --git a/flang/module/__ppc_intrinsics.f90 b/flang/module/__ppc_intrinsics.f90
index 4666f7fb91c396..b3ff33b0619ffa 100644
--- a/flang/module/__ppc_intrinsics.f90
+++ b/flang/module/__ppc_intrinsics.f90
@@ -238,6 +238,24 @@ pure vector(real(VKIND)) function func_vr##VKIND##i0vr##VKIND(arg1, arg2); \
     !dir$ ignore_tkr(r) arg2; \
   end function ;
 
+! vector(u(1)) function f(i, i)
+#define FUNC_VU1I0I(KIND) \
+  vector(unsigned(1)) function func_vu1i0i##KIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    integer(KIND), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
+! vector(u(1)) function f(i, r)
+#define FUNC_VU1I0R(KIND) \
+  vector(unsigned(1)) function func_vu1i0r##KIND(arg1, arg2); \
+    integer(8), intent(in) :: arg1; \
+    !dir$ ignore_tkr(k) arg1; \
+    real(KIND), intent(in) :: arg2; \
+    !dir$ ignore_tkr(r) arg2; \
+  end function ;
+
 ! __vector_pair function f(i, vector(i))
 #define FUNC_VPI0VI(VKIND) \
   pure __vector_pair function func_vpi0vi##VKIND(arg1, arg2); \
@@ -339,6 +357,8 @@ pure vector(real(VKIND)) function func_vec_convert_vr##VKIND##vi##vr##VKIND(v, m
   FUNC_VPI0VU(1) FUNC_VPI0VU(2) FUNC_VPI0VU(4) FUNC_VPI0VU(8)
   FUNC_VPI0VR(4) FUNC_VPI0VR(8)
   FUNC_VPI0VP
+  FUNC_VU1I0I(1) FUNC_VU1I0I(2) FUNC_VU1I0I(4)
+  FUNC_VU1I0R(4)
 
 #undef FUNC_VEC_CONVERT_VRVIVR
 #undef FUNC_VEC_CONVERT_VUVIVU
@@ -347,6 +367,8 @@ pure vector(real(VKIND)) function func_vec_convert_vr##VKIND##vi##vr##VKIND(v, m
 #undef FUNC_VPI0VR
 #undef FUNC_VPI0VU
 #undef FUNC_VPI0VI
+#undef FUNC_VU1I0R
+#undef FUNC_VU1I0I
 #undef FUNC_VRI0VR
 #undef FUNC_VUI0VU
 #undef FUNC_VII0VI
@@ -1154,6 +1176,40 @@ end function func_r8r8i
 #undef VU_VI_VI
 #undef VI_VI_VI
 
+!-------------------------------------------------------
+! vector(unsigned(1)) function(integer, i/r)
+!-------------------------------------------------------
+#define VU1_I0_I(NAME, KIND) __ppc_##NAME##_vu1i0i##KIND
+#define VU1_I0_R(NAME, KIND) __ppc_##NAME##_vu1i0r##KIND
+
+#define VEC_VU1_I0_I(NAME, KIND) \
+  procedure(func_vu1i0i##KIND) :: VU1_I0_I(NAME, KIND);
+#define VEC_VU1_I0_R(NAME, KIND) \
+  procedure(func_vu1i0r##KIND) :: VU1_I0_R(NAME, KIND);
+
+! vec_lvsl
+  VEC_VU1_I0_I(vec_lvsl,1) VEC_VU1_I0_I(vec_lvsl,2) VEC_VU1_I0_I(vec_lvsl,4)
+  VEC_VU1_I0_R(vec_lvsl,4)
+  interface vec_lvsl
+    procedure :: VU1_I0_I(vec_lvsl,1), VU1_I0_I(vec_lvsl,2), VU1_I0_I(vec_lvsl,4)
+    procedure :: VU1_I0_R(vec_lvsl,4)
+  end interface
+  public :: vec_lvsl
+
+! vec_lvsr
+  VEC_VU1_I0_I(vec_lvsr,1) VEC_VU1_I0_I(vec_lvsr,2) VEC_VU1_I0_I(vec_lvsr,4)
+  VEC_VU1_I0_R(vec_lvsr,4)
+  interface vec_lvsr
+    procedure :: VU1_I0_I(vec_lvsr,1), VU1_I0_I(vec_lvsr,2), VU1_I0_I(vec_lvsr,4)
+    procedure :: VU1_I0_R(vec_lvsr,4)
+  end interface
+  public :: vec_lvsr
+
+#undef VEC_VU1_I0_R
+#undef VEC_VU1_I0_I
+#undef VU1_I0_R
+#undef VU1_I0_I
+
 !-------------------------------------------------------
 ! vector function(integer, i/u/r/vector)
 !-------------------------------------------------------
@@ -1214,6 +1270,51 @@ end function func_r8r8i
   end interface
   public :: vec_ldl
 
+! vec_lxv
+  VEC_VI_I0_VI(vec_lxv,1) VEC_VI_I0_VI(vec_lxv,2) VEC_VI_I0_VI(vec_lxv,4) VEC_VI_I0_VI(vec_lxv,8)
+  VEC_VU_I0_VU(vec_lxv,1) VEC_VU_I0_VU(vec_lxv,2) VEC_VU_I0_VU(vec_lxv,4) VEC_VU_I0_VU(vec_lxv,8)
+  VEC_VR_I0_VR(vec_lxv,4) VEC_VR_I0_VR(vec_lxv,8)
+  VEC_VI_I0_I(vec_lxv,1) VEC_VI_I0_I(vec_lxv,2) VEC_VI_I0_I(vec_lxv,4) VEC_VI_I0_I(vec_lxv,8)
+  VEC_VR_I0_R(vec_lxv,4) VEC_VR_I0_R(vec_lxv,8)
+  interface vec_lxv
+    procedure :: VI_I0_VI(vec_lxv,1), VI_I0_VI(vec_lxv,2), VI_I0_VI(vec_lxv,4), VI_I0_VI(vec_lxv,8)
+    procedure :: VU_I0_VU(vec_lxv,1), VU_I0_VU(vec_lxv,2), VU_I0_VU(vec_lxv,4), VU_I0_VU(vec_lxv,8)
+    procedure :: VR_I0_VR(vec_lxv,4), VR_I0_VR(vec_lxv,8)
+    procedure :: VI_I0_I(vec_lxv,1), VI_I0_I(vec_lxv,2), VI_I0_I(vec_lxv,4), VI_I0_I(vec_lxv,8)
+    procedure :: VR_I0_R(vec_lxv,4), VR_I0_R(vec_lxv,8)
+  end interface
+  public :: vec_lxv
+
+! vec_xl
+  VEC_VI_I0_VI(vec_xl,1) VEC_VI_I0_VI(vec_xl,2) VEC_VI_I0_VI(vec_xl,4) VEC_VI_I0_VI(vec_xl,8)
+  VEC_VU_I0_VU(vec_xl,1) VEC_VU_I0_VU(vec_xl,2) VEC_VU_I0_VU(vec_xl,4) VEC_VU_I0_VU(vec_xl,8)
+  VEC_VR_I0_VR(vec_xl,4) VEC_VR_I0_VR(vec_xl,8)
+  VEC_VI_I0_I(vec_xl,1) VEC_VI_I0_I(vec_xl,2) VEC_VI_I0_I(vec_xl,4) VEC_VI_I0_I(vec_xl,8)
+  VEC_VR_I0_R(vec_xl,4) VEC_VR_I0_R(vec_xl,8)
+  interface vec_xl
+    procedure :: VI_I0_VI(vec_xl,1), VI_I0_VI(vec_xl,2), VI_I0_VI(vec_xl,4), VI_I0_VI(vec_xl,8)
+    procedure :: VU_I0_VU(vec_xl,1), VU_I0_VU(vec_xl,2), VU_I0_VU(vec_xl,4), VU_I0_VU(vec_xl,8)
+    procedure :: VR_I0_VR(vec_xl,4), VR_I0_VR(vec_xl,8)
+    procedure :: VI_I0_I(vec_xl,1), VI_I0_I(vec_xl,2), VI_I0_I(vec_xl,4), VI_I0_I(vec_xl,8)
+    procedure :: VR_I0_R(vec_xl,4), VR_I0_R(vec_xl,8)
+  end interface
+  public :: vec_xl
+
+! vec_xl_be
+  VEC_VI_I0_VI(vec_xl_be,1) VEC_VI_I0_VI(vec_xl_be,2) VEC_VI_I0_VI(vec_xl_be,4) VEC_VI_I0_VI(vec_xl_be,8)
+  VEC_VU_I0_VU(vec_xl_be,1) VEC_VU_I0_VU(vec_xl_be,2) VEC_VU_I0_VU(vec_xl_be,4) VEC_VU_I0_VU(vec_xl_be,8)
+  VEC_VR_I0_VR(vec_xl_be,4) VEC_VR_I0_VR(vec_xl_be,8)
+  VEC_VI_I0_I(vec_xl_be,1) VEC_VI_I0_I(vec_xl_be,2) VEC_VI_I0_I(vec_xl_be,4) VEC_VI_I0_I(vec_xl_be,8)
+  VEC_VR_I0_R(vec_xl_be,4) VEC_VR_I0_R(vec_xl_be,8)
+  interface vec_xl_be
+    procedure :: VI_I0_VI(vec_xl_be,1), VI_I0_VI(vec_xl_be,2), VI_I0_VI(vec_xl_be,4), VI_I0_VI(vec_xl_be,8)
+    procedure :: VU_I0_VU(vec_xl_be,1), VU_I0_VU(vec_xl_be,2), VU_I0_VU(vec_xl_be,4), VU_I0_VU(vec_xl_be,8)
+    procedure :: VR_I0_VR(vec_xl_be,4), VR_I0_VR(vec_xl_be,8)
+    procedure :: VI_I0_I(vec_xl_be,1), VI_I0_I(vec_xl_be,2), VI_I0_I(vec_xl_be,4) , VI_I0_I(vec_xl_be,8)
+    procedure :: VR_I0_R(vec_xl_be,4), VR_I0_R(vec_xl_be,8)
+  end interface
+  public :: vec_xl_be
+
 ! vec_xld2
   VEC_VI_I0_VI(vec_xld2_,1) VEC_VI_I0_VI(vec_xld2_,2) VEC_VI_I0_VI(vec_xld2_,4) VEC_VI_I0_VI(vec_xld2_,8)
   VEC_VU_I0_VU(vec_xld2_,1) VEC_VU_I0_VU(vec_xld2_,2) VEC_VU_I0_VU(vec_xld2_,4) VEC_VU_I0_VU(vec_xld2_,8)
@@ -1229,6 +1330,21 @@ end function func_r8r8i
   end interface
   public :: vec_xld2
 
+! vec_xlds
+  VEC_VI_I0_VI(vec_xlds,8)
+  VEC_VU_I0_VU(vec_xlds,8)
+  VEC_VR_I0_VR(vec_xlds,8)
+  VEC_VI_I0_I(vec_xlds,8)
+  VEC_VR_I0_R(vec_xlds,8)
+  interface vec_xlds
+    procedure :: VI_I0_VI(vec_xlds,8)
+    procedure :: VU_I0_VU(vec_xlds,8)
+    procedure :: VR_I0_VR(vec_xlds,8)
+    procedure :: VI_I0_I(vec_xlds,8)
+    procedure :: VR_I0_R(vec_xlds,8)
+  end interface
+  public :: vec_xlds
+
 ! vec_xlw4
   VEC_VI_I0_VI(vec_xlw4_,1) VEC_VI_I0_VI(vec_xlw4_,2)
   VEC_VU_I0_VU(vec_xlw4_,1) VEC_VU_I0_VU(vec_xlw4_,2) VEC_VU_I0_VU(vec_xlw4_,4)

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
index b88dfb5b362f3c..f4e7f7b1db41dc 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-load-elem-order.f90
@@ -297,6 +297,606 @@ subroutine vec_lde_testf32a(arg1, arg2, res)
 ! LLVMIR: store <4 x float> %[[shflv]], ptr %2, align 16
 end subroutine vec_lde_testf32a
 
+!-------------------
+! vec_lvsl
+!-------------------
+
+! CHECK-LABEL: @vec_lvsl_testi8s
+subroutine vec_lvsl_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i8) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[iext:.*]] = sext i8 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsl_testi8s
+
+! CHECK-LABEL: @vec_lvsl_testi16a
+subroutine vec_lvsl_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i16) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[iext:.*]] = sext i16 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsl_testi16a
+
+! CHECK-LABEL: @vec_lvsl_testi32a
+subroutine vec_lvsl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(11, 3, 4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i32) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<11x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsl_testi32a
+
+! CHECK-LABEL: @vec_lvsl_testf32a
+subroutine vec_lvsl_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(51)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<51xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsl_testf32a
+
+!-------------------
+! vec_lvsr
+!-------------------
+
+! CHECK-LABEL: @vec_lvsr_testi8s
+subroutine vec_lvsr_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i8) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[iext:.*]] = sext i8 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsr_testi8s
+
+! CHECK-LABEL: @vec_lvsr_testi16a
+subroutine vec_lvsr_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(41)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i16) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<41xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[iext:.*]] = sext i16 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsr_testi16a
+
+! CHECK-LABEL: @vec_lvsr_testi32a
+subroutine vec_lvsr_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(23, 31, 47)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg1i64:.*]] = fir.convert %[[arg1]] : (i32) -> i64
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1i64]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<23x31x47xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[iext:.*]] = sext i32 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[iext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsr_testi32a
+
+! CHECK-LABEL: @vec_lvsr_testf32a
+subroutine vec_lvsr_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[fiveSix:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[fiveSix]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[fiveSix]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lvsr_testf32a
+
+!-------------------
+! vec_lxv
+!-------------------
+
+! CHECK-LABEL: @vec_lxv_testi8a
+subroutine vec_lxv_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[offset:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR: store <16 x i8> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testi8a
+
+! CHECK-LABEL: @vec_lxv_testi16a
+subroutine vec_lxv_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[offset:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR: store <8 x i16> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testi16a
+
+! CHECK-LABEL: @vec_lxv_testi32a
+subroutine vec_lxv_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[offset:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <4 x i32>, ptr %[[addr]], align 1
+! LLVMIR: store <4 x i32> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testi32a
+
+! CHECK-LABEL: @vec_lxv_testf32a
+subroutine vec_lxv_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[offset:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <4 x float>, ptr %[[addr]], align 1
+! LLVMIR: store <4 x float> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testf32a
+
+! CHECK-LABEL: @vec_lxv_testf64a
+subroutine vec_lxv_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[offset:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[offset]]
+! LLVMIR: %[[res:.*]] = load <2 x double>, ptr %[[addr]], align 1
+! LLVMIR: store <2 x double> %[[res]], ptr %2, align 16
+end subroutine vec_lxv_testf64a
+
+!-------------------
+! vec_xl
+!-------------------
+
+! CHECK-LABEL: @vec_xl_testi8a
+subroutine vec_xl_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(integer(1)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+  
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR: %[[shflv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[shflv]], ptr %2, align 16
+end subroutine vec_xl_testi8a
+
+! CHECK-LABEL: @vec_xl_testi16a
+subroutine vec_xl_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 8)
+  vector(integer(2)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR: %[[shflv:.*]] = shufflevector <8 x i16> %[[ld]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[shflv]], ptr %2, align 16
+end subroutine vec_xl_testi16a
+
+! CHECK-LABEL: @vec_xl_testi32a
+subroutine vec_xl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testi32a
+
+! CHECK-LABEL: @vec_xl_testi64a
+subroutine vec_xl_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  integer(8) :: arg2(2, 4, 1)
+  vector(integer(8)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x1xi64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16
+end subroutine vec_xl_testi64a
+
+! CHECK-LABEL: @vec_xl_testf32a
+subroutine vec_xl_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x.be(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xl_testf32a
+
+! CHECK-LABEL: @vec_xl_testf64a
+subroutine vec_xl_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(2)
+  vector(real(8)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x.be(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x.be(ptr %[[addr]])
+! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testf64a
+
+!-------------------
+! vec_xl_be
+!-------------------
+
+! CHECK-LABEL: @vec_xl_be_testi8a
+subroutine vec_xl_be_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(2, 4, 8)
+  vector(integer(1)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+  
+! LLVMIR: %4 = load i8, ptr %0, align 1
+! LLVMIR: %5 = getelementptr i8, ptr %1, i8 %4
+! LLVMIR: %6 = load <16 x i8>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %7, ptr %2, align 16
+end subroutine vec_xl_be_testi8a
+
+! CHECK-LABEL: @vec_xl_be_testi16a
+subroutine vec_xl_be_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(8,2)
+  vector(integer(2)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<8x2xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %4 = load i16, ptr %0, align 2
+! LLVMIR: %5 = getelementptr i8, ptr %1, i16 %4
+! LLVMIR: %6 = load <8 x i16>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %7, ptr %2, align 16
+end subroutine vec_xl_be_testi16a
+
+! CHECK-LABEL: @vec_xl_be_testi32a
+subroutine vec_xl_be_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4)
+  vector(integer(4)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %4 = load i32, ptr %0, align 4
+! LLVMIR: %5 = getelementptr i8, ptr %1, i32 %4
+! LLVMIR: %6 = load <4 x i32>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %7, ptr %2, align 16
+end subroutine vec_xl_be_testi32a
+
+! CHECK-LABEL: @vec_xl_be_testi64a
+subroutine vec_xl_be_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  integer(8) :: arg2(2, 4, 8)
+  vector(integer(8)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<2xi64>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [1, 0] : vector<2xi64>, vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %4 = load i64, ptr %0, align 8
+! LLVMIR: %5 = getelementptr i8, ptr %1, i64 %4
+! LLVMIR: %6 = load <2 x i64>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <2 x i64> %6, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR: store <2 x i64> %7, ptr %2, align 16
+end subroutine vec_xl_be_testi64a
+
+! CHECK-LABEL: @vec_xl_be_testf32a
+subroutine vec_xl_be_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %4 = load i16, ptr %0, align 2
+! LLVMIR: %5 = getelementptr i8, ptr %1, i16 %4
+! LLVMIR: %6 = load <4 x float>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %7, ptr %2, align 16
+end subroutine vec_xl_be_testf32a
+
+! CHECK-LABEL: @vec_xl_be_testf64a
+subroutine vec_xl_be_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[ref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[ref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref2:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[undefv:.*]] = fir.undefined vector<2xf64>
+! FIR: %[[shflv:.*]] = vector.shuffle %[[ref2]], %[[undefv]] [1, 0] : vector<2xf64>, vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[shflv]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %4 = load i64, ptr %0, align 8
+! LLVMIR: %5 = getelementptr i8, ptr %1, i64 %4
+! LLVMIR: %6 = load <2 x double>, ptr %5, align 1
+! LLVMIR: %7 = shufflevector <2 x double> %6, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR: store <2 x double> %7, ptr %2, align 16
+end subroutine vec_xl_be_testf64a
+
 !-------------------
 ! vec_xld2
 !-------------------
@@ -520,3 +1120,57 @@ subroutine vec_xlw4_testf32a(arg1, arg2, res)
 ! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
 ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
 end subroutine vec_xlw4_testf32a
+
+!-------------------
+! vec_xlds
+!-------------------
+
+! CHECK-LABEL: @vec_xlds_testi64a
+subroutine vec_xlds_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(integer(8)) :: arg2(4)
+  vector(integer(8)) :: res
+  res = vec_xlds(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[aryref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[aryref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
+! FIR: %[[val:.*]] = fir.load %[[ref]] : !fir.ref<i64>
+! FIR: %[[vsplt:.*]] = vector.splat %[[val]] : vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[vsplt]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
+! LLVMIR: %[[insrt:.*]] = insertelement <2 x i64> undef, i64 %[[ld]], i32 0
+! LLVMIR: %[[shflv:.*]] = shufflevector <2 x i64> %[[insrt]], <2 x i64> undef, <2 x i32> zeroinitializer
+! LLVMIR: store <2 x i64> %[[shflv]], ptr %2, align 16
+end subroutine vec_xlds_testi64a
+
+! CHECK-LABEL: @vec_xlds_testf64a
+subroutine vec_xlds_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(8)) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xlds(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[aryref:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[aryref]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
+! FIR: %[[val:.*]] = fir.load %[[ref]] : !fir.ref<i64>
+! FIR: %[[vsplt:.*]] = vector.splat %[[val]] : vector<2xi64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[vsplt]] : vector<2xi64> to vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
+! LLVMIR: %[[insrt:.*]] = insertelement <2 x i64> undef, i64 %[[ld]], i32 0
+! LLVMIR: %[[shflv:.*]] = shufflevector <2 x i64> %[[insrt]], <2 x i64> undef, <2 x i32> zeroinitializer
+! LLVMIR: %[[bc:.*]] = bitcast <2 x i64> %[[shflv]] to <2 x double>
+! LLVMIR: store <2 x double> %[[bc]], ptr %2, align 16
+end subroutine vec_xlds_testf64a

diff  --git a/flang/test/Lower/PowerPC/ppc-vec-load.f90 b/flang/test/Lower/PowerPC/ppc-vec-load.f90
index ed08f60789a0ac..1af8cd39c506b7 100644
--- a/flang/test/Lower/PowerPC/ppc-vec-load.f90
+++ b/flang/test/Lower/PowerPC/ppc-vec-load.f90
@@ -430,6 +430,354 @@ subroutine vec_ldl_testi32s(arg1, arg2, res)
 ! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
 end subroutine vec_ldl_testi32s
 
+!----------------------
+! vec_lvsl
+!----------------------
+
+! CHECK-LABEL: @vec_lvsl_testi8s
+subroutine vec_lvsl_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i8) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[ext:.*]] = sext i8 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsl_testi8s
+
+! CHECK-LABEL: @vec_lvsl_testi16a
+subroutine vec_lvsl_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i16) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[ext:.*]] = sext i16 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsl_testi16a
+
+! CHECK-LABEL: @vec_lvsl_testi32a
+subroutine vec_lvsl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 3, 4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i32) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[ext:.*]] = sext i32 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsl_testi32a
+
+! CHECK-LABEL: @vec_lvsl_testf32a
+subroutine vec_lvsl_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsl(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsl(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsl_testf32a
+
+!----------------------
+! vec_lvsr
+!----------------------
+
+! CHECK-LABEL: @vec_lvsr_testi8s
+subroutine vec_lvsr_testi8s(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i8) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[ext:.*]] = sext i8 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[ld:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[addr:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[ld]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[addr]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsr_testi8s
+
+! CHECK-LABEL: @vec_lvsr_testi16a
+subroutine vec_lvsr_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i16) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[ext:.*]] = sext i16 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[ld:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[addr:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[ld]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[addr]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsr_testi16a
+
+! CHECK-LABEL: @vec_lvsr_testi32a
+subroutine vec_lvsr_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 3, 4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg1ext:.*]] = fir.convert %[[arg1]] : (i32) -> i64
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1ext]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x3x4xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[ext:.*]] = sext i32 %[[arg1]] to i64
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[ext]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[ld:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[addr:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[ld]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[addr]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsr_testi32a
+
+! CHECK-LABEL: @vec_lvsr_testf32a
+subroutine vec_lvsr_testf32a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(4) :: arg2(4)
+  vector(unsigned(1)) :: res
+  res = vec_lvsr(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[c56:.*]] = arith.constant 56 : i64
+! FIR: %[[lshft:.*]] = arith.shli %[[arg1]], %[[c56]] : i64
+! FIR: %[[rshft:.*]] = arith.shrsi %[[lshft]], %[[c56]] : i64
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[rshft]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.altivec.lvsr(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<16xi8>
+! FIR: %[[vundef:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[sv:.*]] = vector.shuffle %[[ld]], %[[vundef]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[sv]] : (vector<16xi8>) -> !fir.vector<16:ui8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:ui8>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[lshft:.*]] = shl i64 %[[arg1]], 56
+! LLVMIR: %[[rshft:.*]] = ashr i64 %[[lshft]], 56
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[rshft]]
+! LLVMIR: %[[ld:.*]] = call <16 x i8> @llvm.ppc.altivec.lvsr(ptr %[[addr]])
+! LLVMIR: %[[sv:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[sv]], ptr %2, align 16
+end subroutine vec_lvsr_testf32a
+
+!----------------------
+! vec_lxv
+!----------------------
+
+! CHECK-LABEL: @vec_lxv_testi8a
+subroutine vec_lxv_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testi8a
+
+! CHECK-LABEL: @vec_lxv_testi16a
+subroutine vec_lxv_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <8 x i16> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testi16a
+
+! CHECK-LABEL: @vec_lxv_testi32a
+subroutine vec_lxv_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <4 x i32>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testi32a
+
+! CHECK-LABEL: @vec_lxv_testf32a
+subroutine vec_lxv_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <4 x float>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <4 x float> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testf32a
+
+! CHECK-LABEL: @vec_lxv_testf64a
+subroutine vec_lxv_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_lxv(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR_P9: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR_P9: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR_P9: %[[ld:.*]] = load <2 x double>, ptr %[[addr]], align 1
+! LLVMIR_P9: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_lxv_testf64a
+
 !----------------------
 ! vec_xld2
 !----------------------
@@ -564,6 +912,330 @@ subroutine vec_xld2_testf64a(arg1, arg2, res)
 ! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
 end subroutine vec_xld2_testf64a
 
+!----------------------
+! vec_xl
+!----------------------
+
+! CHECK-LABEL: @vec_xl_testi8a
+subroutine vec_xl_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(4)
+  vector(integer(1)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR: store <16 x i8> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testi8a
+
+! CHECK-LABEL: @vec_xl_testi16a
+subroutine vec_xl_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR: store <8 x i16> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testi16a
+
+! CHECK-LABEL: @vec_xl_testi32a
+subroutine vec_xl_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: store <4 x i32> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testi32a
+
+! CHECK-LABEL: @vec_xl_testi64a
+subroutine vec_xl_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  integer(8) :: arg2(2, 4, 8)
+  vector(integer(8)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<2xf64> to vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <2 x double> %[[ld]] to <2 x i64>
+! LLVMIR: store <2 x i64> %[[bc]], ptr %2, align 16
+end subroutine vec_xl_testi64a
+
+! CHECK-LABEL: @vec_xl_testf32a
+subroutine vec_xl_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvw4x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<4xi32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[ld]] : vector<4xi32> to vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %[[addr]])
+! LLVMIR: %[[bc:.*]] = bitcast <4 x i32> %[[ld]] to <4 x float>
+! LLVMIR: store <4 x float> %[[bc]], ptr %2, align 16
+end subroutine vec_xl_testf32a
+
+! CHECK-LABEL: @vec_xl_testf64a
+subroutine vec_xl_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2
+  vector(real(8)) :: res
+  res = vec_xl(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<f64>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.call @llvm.ppc.vsx.lxvd2x(%[[addr]]) fastmath<contract> : (!fir.ref<!fir.array<?xi8>>) -> vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[ld]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = call contract <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %[[addr]])
+! LLVMIR: store <2 x double> %[[ld]], ptr %2, align 16
+end subroutine vec_xl_testf64a
+
+!----------------------
+! vec_xlds
+!----------------------
+
+! CHECK-LABEL: @vec_xlds_testi64a
+subroutine vec_xlds_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(integer(8)) :: arg2(4)
+  vector(integer(8)) :: res
+  res = vec_xlds(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:i64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[cnv:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
+! FIR: %[[ld:.*]] = fir.load %[[cnv]] : !fir.ref<i64>
+! FIR: %[[vsplt:.*]] = vector.splat %[[ld]] : vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[vsplt]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
+! LLVMIR: %[[insrt:.*]] = insertelement <2 x i64> undef, i64 %[[ld]], i32 0
+! LLVMIR: %[[shfl:.*]] = shufflevector <2 x i64> %[[insrt]], <2 x i64> undef, <2 x i32> zeroinitializer
+! LLVMIR: store <2 x i64> %[[shfl]], ptr %2, align 16
+end subroutine vec_xlds_testi64a
+
+! CHECK-LABEL: @vec_xlds_testf64a
+subroutine vec_xlds_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  vector(real(8)) :: arg2(4)
+  vector(real(8)) :: res
+  res = vec_xlds(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[arg2:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4x!fir.vector<2:f64>>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg2]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[cnv:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<i64>
+! FIR: %[[ld:.*]] = fir.load %[[cnv]] : !fir.ref<i64>
+! FIR: %[[vsplt:.*]] = vector.splat %[[ld]] : vector<2xi64>
+! FIR: %[[bc:.*]] = vector.bitcast %[[vsplt]] : vector<2xi64> to vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[bc]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load i64, ptr %[[addr]], align 8
+! LLVMIR: %[[insrt:.*]] = insertelement <2 x i64> undef, i64 %[[ld]], i32 0
+! LLVMIR: %[[shfl:.*]] = shufflevector <2 x i64> %[[insrt]], <2 x i64> undef, <2 x i32> zeroinitializer
+! LLVMIR: %[[bc:.*]] = bitcast <2 x i64> %[[shfl]] to <2 x double>
+! LLVMIR: store <2 x double> %[[bc]], ptr %2, align 16
+end subroutine vec_xlds_testf64a
+
+!----------------------
+! vec_xl_be
+!----------------------
+
+! CHECK-LABEL: @vec_xl_be_testi8a
+subroutine vec_xl_be_testi8a(arg1, arg2, res)
+  integer(1) :: arg1
+  integer(1) :: arg2(2, 4, 8)
+  vector(integer(1)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i8>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi8>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i8) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<16xi8>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] : vector<16xi8>, vector<16xi8>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<16xi8>) -> !fir.vector<16:i8>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<16:i8>>
+
+! LLVMIR: %[[arg1:.*]] = load i8, ptr %0, align 1
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i8 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <16 x i8>, ptr %[[addr]], align 1
+! LLVMIR: %[[shff:.*]] = shufflevector <16 x i8> %[[ld]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <16 x i8> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testi8a
+
+! CHECK-LABEL: @vec_xl_be_testi16a
+subroutine vec_xl_be_testi16a(arg1, arg2, res)
+  integer(2) :: arg1
+  integer(2) :: arg2(2, 4, 8)
+  vector(integer(2)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<8xi16>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<8:i16>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR: %[[ld:.*]] = load <8 x i16>, ptr %[[addr]], align 1
+! LLVMIR: %[[shff:.*]] = shufflevector <8 x i16> %[[ld]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <8 x i16> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testi16a
+
+! CHECK-LABEL: @vec_xl_be_testi32a
+subroutine vec_xl_be_testi32a(arg1, arg2, res)
+  integer(4) :: arg1
+  integer(4) :: arg2(2, 4, 8)
+  vector(integer(4)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i32>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load i32, ptr %0, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i32 %[[arg1]]
+! LLVMIR:  %[[ld:.*]] = load <4 x i32>, ptr %[[addr]], align 1
+! LLVMIR:  %[[shff:.*]] = shufflevector <4 x i32> %[[ld]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <4 x i32> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testi32a
+
+! CHECK-LABEL: @vec_xl_be_testi64a
+subroutine vec_xl_be_testi64a(arg1, arg2, res)
+  integer(8) :: arg1
+  integer(8) :: arg2(2, 4, 8)
+  vector(integer(8)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<2x4x8xi64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<2xi64>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [1, 0] : vector<2xi64>, vector<2xi64>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR:  %[[ld:.*]] = load <2 x i64>, ptr %[[addr]], align 1
+! LLVMIR:  %[[shff:.*]] = shufflevector <2 x i64> %[[ld]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR:  store <2 x i64> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testi64a
+
+! CHECK-LABEL: @vec_xl_be_testf32a
+subroutine vec_xl_be_testf32a(arg1, arg2, res)
+  integer(2) :: arg1
+  real(4) :: arg2(4)
+  vector(real(4)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i16>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<4xf32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load i16, ptr %0, align 2
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i16 %[[arg1]]
+! LLVMIR:  %[[ld:.*]] = load <4 x float>, ptr %[[addr]], align 1
+! LLVMIR:  %[[shff:.*]] = shufflevector <4 x float> %[[ld]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR:  store <4 x float> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testf32a
+
+! CHECK-LABEL: @vec_xl_be_testf64a
+subroutine vec_xl_be_testf64a(arg1, arg2, res)
+  integer(8) :: arg1
+  real(8) :: arg2(7)
+  vector(real(8)) :: res
+  res = vec_xl_be(arg1, arg2)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<i64>
+! FIR: %[[uarr:.*]] = fir.convert %arg1 : (!fir.ref<!fir.array<7xf64>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[uarr]], %[[arg1]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[ld:.*]] = fir.load %[[addr]] {alignment = 1 : i64} : !fir.ref<!fir.array<?xi8>>
+! FIR: %[[uv:.*]] = fir.undefined vector<2xf64>
+! FIR: %[[shff:.*]] = vector.shuffle %[[ld]], %[[uv]] [1, 0] : vector<2xf64>, vector<2xf64>
+! FIR: %[[res:.*]] = fir.convert %[[shff]] : (vector<2xf64>) -> !fir.vector<2:f64>
+! FIR: fir.store %[[res]] to %arg2 : !fir.ref<!fir.vector<2:f64>>
+
+! LLVMIR: %[[arg1:.*]] = load i64, ptr %0, align 8
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %1, i64 %[[arg1]]
+! LLVMIR:  %[[ld:.*]] = load <2 x double>, ptr %[[addr]], align 1
+! LLVMIR:  %[[shff:.*]] = shufflevector <2 x double> %[[ld]], <2 x double> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR:  store <2 x double> %[[shff]], ptr %2, align 16
+end subroutine vec_xl_be_testf64a
+
 !----------------------
 ! vec_xlw4
 !----------------------