[flang-commits] [flang] 45c0053 - [flang] Add a subset of PowerPC vector store intrinsics
Kelvin Li via flang-commits
flang-commits at lists.llvm.org
Thu Aug 10 15:11:42 PDT 2023
Author: Kelvin Li
Date: 2023-08-10T18:03:14-04:00
New Revision: 45c0053db26e128d4b250e641a33843e7316d9d3
URL: https://github.com/llvm/llvm-project/commit/45c0053db26e128d4b250e641a33843e7316d9d3
DIFF: https://github.com/llvm/llvm-project/commit/45c0053db26e128d4b250e641a33843e7316d9d3.diff
LOG: [flang] Add a subset of PowerPC vector store intrinsics
This patch adds vec_st, vec_ste, vec_stxv, vec_xst, vec_xst_be, vec_xstd2
and vec_xstw4.
Differential Revision: https://reviews.llvm.org/D156333
Added:
flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
flang/test/Lower/PowerPC/ppc-vec-store.f90
Modified:
flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
flang/module/__ppc_intrinsics.f90
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
index 981dd382f62bcb..31d0400d3108df 100644
--- a/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/PPCIntrinsicCall.h
@@ -45,8 +45,15 @@ enum class VecOp {
Sr,
Srl,
Sro,
+ St,
+ Ste,
+ Stxv,
Sub,
- Xor
+ Xor,
+ Xst,
+ Xst_be,
+ Xstd2,
+ Xstw4
};
/// Enums used to templatize and share lowering of PowerPC MMA intrinsics.
@@ -195,6 +202,12 @@ struct PPCIntrinsicLibrary : IntrinsicLibrary {
fir::ExtendedValue genVecSel(mlir::Type resultType,
llvm::ArrayRef<fir::ExtendedValue> args);
+
+ template <VecOp>
+ void genVecStore(llvm::ArrayRef<fir::ExtendedValue>);
+
+ template <VecOp>
+ void genVecXStore(llvm::ArrayRef<fir::ExtendedValue>);
};
const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name);
diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
index ec8488d03418fa..c1c6a2a36326fd 100644
--- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp
@@ -208,6 +208,21 @@ static constexpr IntrinsicHandler ppcHandlers[]{
&PI::genVecShift<VecOp::Sro>),
{{{"arg1", asValue}, {"arg2", asValue}}},
/*isElemental=*/true},
+ {"__ppc_vec_st",
+ static_cast<IntrinsicLibrary::SubroutineGenerator>(
+ &PI::genVecStore<VecOp::St>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
+ /*isElemental=*/false},
+ {"__ppc_vec_ste",
+ static_cast<IntrinsicLibrary::SubroutineGenerator>(
+ &PI::genVecStore<VecOp::Ste>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
+ /*isElemental=*/false},
+ {"__ppc_vec_stxv",
+ static_cast<IntrinsicLibrary::SubroutineGenerator>(
+ &PI::genVecXStore<VecOp::Stxv>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
+ /*isElemental=*/false},
{"__ppc_vec_sub",
static_cast<IntrinsicLibrary::ExtendedGenerator>(
&PI::genVecAddAndMulSubXor<VecOp::Sub>),
@@ -218,6 +233,26 @@ static constexpr IntrinsicHandler ppcHandlers[]{
&PI::genVecAddAndMulSubXor<VecOp::Xor>),
{{{"arg1", asValue}, {"arg2", asValue}}},
/*isElemental=*/true},
+ {"__ppc_vec_xst",
+ static_cast<IntrinsicLibrary::SubroutineGenerator>(
+ &PI::genVecXStore<VecOp::Xst>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
+ /*isElemental=*/false},
+ {"__ppc_vec_xst_be",
+ static_cast<IntrinsicLibrary::SubroutineGenerator>(
+ &PI::genVecXStore<VecOp::Xst_be>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
+ /*isElemental=*/false},
+ {"__ppc_vec_xstd2_",
+ static_cast<IntrinsicLibrary::SubroutineGenerator>(
+ &PI::genVecXStore<VecOp::Xstd2>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
+ /*isElemental=*/false},
+ {"__ppc_vec_xstw4_",
+ static_cast<IntrinsicLibrary::SubroutineGenerator>(
+ &PI::genVecXStore<VecOp::Xstw4>),
+ {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}},
+ /*isElemental=*/false},
};
static constexpr MathOperation ppcMathOperations[] = {
@@ -1680,4 +1715,188 @@ void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) {
}
}
+static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value baseAddr,
+ mlir::Value offset) {
+ auto typeExtent{fir::SequenceType::getUnknownExtent()};
+ // Construct an !fir.ref<!ref.array<?xi8>> type
+ auto arrRefTy{builder.getRefType(fir::SequenceType::get(
+ {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))};
+ // Convert arg to !fir.ref<!ref.array<?xi8>>
+ auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)};
+
+ return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset);
+}
+
+static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value v,
+ int64_t len) {
+ assert(v.getType().isa<mlir::VectorType>());
+ assert(len > 0);
+ llvm::SmallVector<int64_t, 16> mask;
+ for (int64_t i = 0; i < len; ++i) {
+ mask.push_back(len - 1 - i);
+ }
+
+ auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())};
+ return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask);
+}
+
+// VEC_ST, VEC_STE
+template <VecOp vop>
+void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+
+ auto context{builder.getContext()};
+ auto argBases{getBasesForArgs(args)};
+ auto arg1TyInfo{getVecTypeFromFir(argBases[0])};
+
+ auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
+
+ llvm::StringRef fname{};
+ mlir::VectorType stTy{nullptr};
+ auto i32ty{mlir::IntegerType::get(context, 32)};
+ switch (vop) {
+ case VecOp::St:
+ stTy = mlir::VectorType::get(4, i32ty);
+ fname = "llvm.ppc.altivec.stvx";
+ break;
+ case VecOp::Ste: {
+ const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()};
+ const auto len{arg1TyInfo.len};
+
+ if (arg1TyInfo.isFloat32()) {
+ stTy = mlir::VectorType::get(len, i32ty);
+ fname = "llvm.ppc.altivec.stvewx";
+ } else if (arg1TyInfo.eleTy.isa<mlir::IntegerType>()) {
+ stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width));
+
+ switch (width) {
+ case 8:
+ fname = "llvm.ppc.altivec.stvebx";
+ break;
+ case 16:
+ fname = "llvm.ppc.altivec.stvehx";
+ break;
+ case 32:
+ fname = "llvm.ppc.altivec.stvewx";
+ break;
+ default:
+ assert("invalid element size");
+ }
+ } else
+ assert("unknown type");
+ break;
+ }
+ default:
+ llvm_unreachable("invalid vector operation for generator");
+ }
+
+ auto funcType{
+ mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)};
+ mlir::func::FuncOp funcOp = builder.addNamedFunction(loc, fname, funcType);
+
+ llvm::SmallVector<mlir::Value, 4> biArgs;
+
+ mlir::Value newArg1;
+ auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())};
+ auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context),
+ argBases[0])};
+
+ if (stTy != arg1TyInfo.toMlirVectorType(context))
+ newArg1 = builder.create<mlir::vector::BitCastOp>(loc, stTy, cnv);
+ else
+ newArg1 = cnv;
+
+ if (isBEVecElemOrderOnLE())
+ newArg1 = builder.createConvert(
+ loc, stTy, reverseVectorElements(builder, loc, newArg1, 4));
+
+ biArgs.push_back(newArg1);
+ biArgs.push_back(addr);
+
+ builder.create<fir::CallOp>(loc, funcOp, biArgs);
+}
+
+static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder,
+ const int val) {
+ auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)};
+ auto alignAttr{mlir::IntegerAttr::get(i64ty, val)};
+ return builder.getNamedAttr("alignment", alignAttr);
+}
+
+// VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4
+template <VecOp vop>
+void PPCIntrinsicLibrary::genVecXStore(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ auto context{builder.getContext()};
+ auto argBases{getBasesForArgs(args)};
+ VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])};
+
+ auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])};
+
+ mlir::Value trg{nullptr};
+ mlir::Value src{nullptr};
+
+ switch (vop) {
+ case VecOp::Xst:
+ case VecOp::Xst_be: {
+ src = argBases[0];
+ trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
+ addr);
+
+ if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) {
+ auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
+ argBases[0])};
+ auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)};
+
+ src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf);
+ }
+ break;
+ }
+ case VecOp::Xstd2:
+ case VecOp::Xstw4: {
+ // an 16-byte vector arg1 is treated as two 8-byte elements or
+ // four 4-byte elements
+ mlir::IntegerType elemTy;
+ uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4;
+ elemTy = builder.getIntegerType(128 / numElem);
+
+ mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)};
+ fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)};
+
+ auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context),
+ argBases[0])};
+
+ mlir::Type srcTy{nullptr};
+ if (numElem != arg1TyInfo.len) {
+ cnv = builder.create<mlir::vector::BitCastOp>(loc, mlirVecTy, cnv);
+ srcTy = firVecTy;
+ } else {
+ srcTy = arg1TyInfo.toFirVectorType();
+ }
+
+ trg = builder.createConvert(loc, builder.getRefType(srcTy), addr);
+
+ if (isBEVecElemOrderOnLE()) {
+ cnv = reverseVectorElements(builder, loc, cnv, numElem);
+ }
+
+ src = builder.createConvert(loc, srcTy, cnv);
+ break;
+ }
+ case VecOp::Stxv:
+ src = argBases[0];
+ trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()),
+ addr);
+ break;
+ default:
+ assert("Invalid vector operation for generator");
+ }
+ builder.create<fir::StoreOp>(loc, mlir::TypeRange{},
+ mlir::ValueRange{src, trg},
+ getAlignmentAttr(builder, 1));
+}
+
} // namespace fir
diff --git a/flang/module/__ppc_intrinsics.f90 b/flang/module/__ppc_intrinsics.f90
index 4f9c8c9432a622..936c50f92c83ce 100644
--- a/flang/module/__ppc_intrinsics.f90
+++ b/flang/module/__ppc_intrinsics.f90
@@ -323,6 +323,81 @@ elemental vector(real(VKIND)) function elem_func_vr##VKIND##r##VKIND##vr##VKIND#
#undef ELEM_FUNC_VIVIVIVU_2
#undef ELEM_FUNC_VRVRVRVR
+!! ================ 3 argument subroutine interfaces =================================
+! subroutine(vector(i), i, vector(i))
+#define SUB_VIIVI(VKIND) \
+ pure subroutine sub_vi##VKIND##ivi##VKIND(arg1, arg2, arg3); \
+ vector(integer(VKIND)), intent(in) :: arg1; \
+ integer(8), intent(in) :: arg2; \
+ !dir$ ignore_tkr(k) arg2; \
+ vector(integer(VKIND)), intent(in) :: arg3; \
+ !dir$ ignore_tkr(r) arg3; \
+ end subroutine ;
+
+! subroutine(vector(u), i, vector(u))
+#define SUB_VUIVU(VKIND) \
+ pure subroutine sub_vu##VKIND##ivu##VKIND(arg1, arg2, arg3); \
+ vector(unsigned(VKIND)), intent(in) :: arg1; \
+ integer(8), intent(in) :: arg2; \
+ !dir$ ignore_tkr(k) arg2; \
+ vector(unsigned(VKIND)), intent(in) :: arg3; \
+ !dir$ ignore_tkr(r) arg3; \
+ end subroutine ;
+
+! subroutine(vector(r), i, vector(r))
+#define SUB_VRIVR(VKIND) \
+ pure subroutine sub_vr##VKIND##ivr##VKIND(arg1, arg2, arg3); \
+ vector(real(VKIND)), intent(in) :: arg1; \
+ integer(8), intent(in) :: arg2; \
+ !dir$ ignore_tkr(k) arg2; \
+ vector(real(VKIND)), intent(in) :: arg3; \
+ !dir$ ignore_tkr(r) arg3; \
+ end subroutine ;
+
+! subroutine(vector(i), i, i)
+#define SUB_VIII(VKIND) \
+ pure subroutine sub_vi##VKIND##ii##VKIND(arg1, arg2, arg3); \
+ vector(integer(VKIND)), intent(in) :: arg1; \
+ integer(8), intent(in) :: arg2; \
+ !dir$ ignore_tkr(k) arg2; \
+ integer(VKIND), intent(out) :: arg3; \
+ !dir$ ignore_tkr(r) arg3; \
+ end subroutine ;
+
+! subroutine(vector(u), i, i)
+#define SUB_VUII(VKIND) \
+ pure subroutine sub_vu##VKIND##ii##VKIND(arg1, arg2, arg3); \
+ vector(unsigned(VKIND)), intent(in) :: arg1; \
+ integer(8), intent(in) :: arg2; \
+ !dir$ ignore_tkr(k) arg2; \
+ integer(VKIND), intent(out) :: arg3; \
+ !dir$ ignore_tkr(r) arg3; \
+ end subroutine ;
+
+! subroutine(vector(r), i, r)
+#define SUB_VRIR(VKIND) \
+ pure subroutine sub_vr##VKIND##ir##VKIND(arg1, arg2, arg3); \
+ vector(real(VKIND)), intent(in) :: arg1; \
+ integer(8), intent(in) :: arg2; \
+ !dir$ ignore_tkr(k) arg2; \
+ real(VKIND), intent(out) :: arg3; \
+ !dir$ ignore_tkr(r) arg3; \
+ end subroutine ;
+
+ SUB_VIIVI(1) SUB_VIIVI(2) SUB_VIIVI(4) SUB_VIIVI(8)
+ SUB_VUIVU(1) SUB_VUIVU(2) SUB_VUIVU(4) SUB_VUIVU(8)
+ SUB_VRIVR(4) SUB_VRIVR(8)
+ SUB_VIII(1) SUB_VIII(2) SUB_VIII(4) SUB_VIII(8)
+ SUB_VUII(1) SUB_VUII(2) SUB_VUII(4) SUB_VUII(8)
+ SUB_VRIR(4) SUB_VRIR(8)
+
+#undef SUB_VRIR
+#undef SUB_VUII
+#undef SUB_VIII
+#undef SUB_VRIVR
+#undef SUB_VUIVU
+#undef SUB_VIIVI
+
end interface
procedure(func_r4r4r4r4) :: __ppc_fmadd_r4
@@ -1102,4 +1177,155 @@ end function func_r8r8i
#undef VR_VU_I
#undef VR_VI_I
+!--------------------------------------------------
+! subroutine(vector, integer, vector/integer/real)
+!--------------------------------------------------
+! 'i0' stands for the integer argument being ignored via
+! the `ignore_tkr' directive.
+#define SUB_VI_I_VI(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0vi##VKIND
+#define SUB_VU_I_VU(NAME, VKIND) __ppc_##NAME##_vu##VKIND##i0vu##VKIND
+#define SUB_VR_I_VR(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0vr##VKIND
+#define SUB_VI_I_I(NAME, VKIND) __ppc_##NAME##_vi##VKIND##i0i##VKIND
+#define SUB_VU_I_I(NAME, VKIND) __ppc_##NAME##_vu##VKIND##i0u##VKIND
+#define SUB_VR_I_R(NAME, VKIND) __ppc_##NAME##_vr##VKIND##i0r##VKIND
+
+#define VEC_SUB_VI_I_VI(NAME, VKIND) \
+ procedure(sub_vi##VKIND##ivi##VKIND) :: SUB_VI_I_VI(NAME, VKIND);
+#define VEC_SUB_VU_I_VU(NAME, VKIND) \
+ procedure(sub_vu##VKIND##ivu##VKIND) :: SUB_VU_I_VU(NAME, VKIND);
+#define VEC_SUB_VR_I_VR(NAME, VKIND) \
+ procedure(sub_vr##VKIND##ivr##VKIND) :: SUB_VR_I_VR(NAME, VKIND);
+#define VEC_SUB_VI_I_I(NAME, VKIND) \
+ procedure(sub_vi##VKIND##ii##VKIND) :: SUB_VI_I_I(NAME, VKIND);
+#define VEC_SUB_VU_I_I(NAME, VKIND) \
+ procedure(sub_vu##VKIND##ii##VKIND) :: SUB_VU_I_I(NAME, VKIND);
+#define VEC_SUB_VR_I_R(NAME, VKIND) \
+ procedure(sub_vr##VKIND##ir##VKIND) :: SUB_VR_I_R(NAME, VKIND);
+
+! vec_st
+ VEC_SUB_VI_I_VI(vec_st,1) VEC_SUB_VI_I_VI(vec_st,2) VEC_SUB_VI_I_VI(vec_st,4)
+ VEC_SUB_VU_I_VU(vec_st,1) VEC_SUB_VU_I_VU(vec_st,2) VEC_SUB_VU_I_VU(vec_st,4)
+ VEC_SUB_VR_I_VR(vec_st,4)
+ VEC_SUB_VI_I_I(vec_st,1) VEC_SUB_VI_I_I(vec_st,2) VEC_SUB_VI_I_I(vec_st,4)
+ VEC_SUB_VU_I_I(vec_st,1) VEC_SUB_VU_I_I(vec_st,2) VEC_SUB_VU_I_I(vec_st,4)
+ VEC_SUB_VR_I_R(vec_st,4)
+ interface vec_st
+ procedure :: SUB_VI_I_VI(vec_st,1), SUB_VI_I_VI(vec_st,2), SUB_VI_I_VI(vec_st,4)
+ procedure :: SUB_VU_I_VU(vec_st,1), SUB_VU_I_VU(vec_st,2), SUB_VU_I_VU(vec_st,4)
+ procedure :: SUB_VR_I_VR(vec_st,4)
+ procedure :: SUB_VI_I_I(vec_st,1), SUB_VI_I_I(vec_st,2), SUB_VI_I_I(vec_st,4)
+ procedure :: SUB_VU_I_I(vec_st,1), SUB_VU_I_I(vec_st,2), SUB_VU_I_I(vec_st,4)
+ procedure :: SUB_VR_I_R(vec_st,4)
+ end interface vec_st
+ public :: vec_st
+
+! vec_ste
+ VEC_SUB_VI_I_I(vec_ste,1) VEC_SUB_VI_I_I(vec_ste,2) VEC_SUB_VI_I_I(vec_ste,4)
+ VEC_SUB_VU_I_I(vec_ste,1) VEC_SUB_VU_I_I(vec_ste,2) VEC_SUB_VU_I_I(vec_ste,4)
+ VEC_SUB_VR_I_R(vec_ste,4)
+ interface vec_ste
+ procedure :: SUB_VI_I_I(vec_ste,1), SUB_VI_I_I(vec_ste,2), SUB_VI_I_I(vec_ste,4)
+ procedure :: SUB_VU_I_I(vec_ste,1), SUB_VU_I_I(vec_ste,2), SUB_VU_I_I(vec_ste,4)
+ procedure :: SUB_VR_I_R(vec_ste,4)
+ end interface vec_ste
+ public :: vec_ste
+
+! vec_stxv
+ VEC_SUB_VI_I_VI(vec_stxv,1) VEC_SUB_VI_I_VI(vec_stxv,2) VEC_SUB_VI_I_VI(vec_stxv,4) VEC_SUB_VI_I_VI(vec_stxv,8)
+ VEC_SUB_VU_I_VU(vec_stxv,1) VEC_SUB_VU_I_VU(vec_stxv,2) VEC_SUB_VU_I_VU(vec_stxv,4) VEC_SUB_VU_I_VU(vec_stxv,8)
+ VEC_SUB_VR_I_VR(vec_stxv,4) VEC_SUB_VR_I_VR(vec_stxv,8)
+ VEC_SUB_VI_I_I(vec_stxv,1) VEC_SUB_VI_I_I(vec_stxv,2) VEC_SUB_VI_I_I(vec_stxv,4) VEC_SUB_VI_I_I(vec_stxv,8)
+ VEC_SUB_VU_I_I(vec_stxv,1) VEC_SUB_VU_I_I(vec_stxv,2) VEC_SUB_VU_I_I(vec_stxv,4) VEC_SUB_VU_I_I(vec_stxv,8)
+ VEC_SUB_VR_I_R(vec_stxv,4) VEC_SUB_VR_I_R(vec_stxv,8)
+ interface vec_stxv
+ procedure :: SUB_VI_I_VI(vec_stxv,1), SUB_VI_I_VI(vec_stxv,2), SUB_VI_I_VI(vec_stxv,4), SUB_VI_I_VI(vec_stxv,8)
+ procedure :: SUB_VU_I_VU(vec_stxv,1), SUB_VU_I_VU(vec_stxv,2), SUB_VU_I_VU(vec_stxv,4), SUB_VU_I_VU(vec_stxv,8)
+ procedure :: SUB_VR_I_VR(vec_stxv,4), SUB_VR_I_VR(vec_stxv,8)
+ procedure :: SUB_VI_I_I(vec_stxv,1), SUB_VI_I_I(vec_stxv,2), SUB_VI_I_I(vec_stxv,4), SUB_VI_I_I(vec_stxv,8)
+ procedure :: SUB_VU_I_I(vec_stxv,1), SUB_VU_I_I(vec_stxv,2), SUB_VU_I_I(vec_stxv,4), SUB_VU_I_I(vec_stxv,8)
+ procedure :: SUB_VR_I_R(vec_stxv,4), SUB_VR_I_R(vec_stxv,8)
+ end interface vec_stxv
+ public :: vec_stxv
+
+! vec_xst
+ VEC_SUB_VI_I_VI(vec_xst,1) VEC_SUB_VI_I_VI(vec_xst,2) VEC_SUB_VI_I_VI(vec_xst,4) VEC_SUB_VI_I_VI(vec_xst,8)
+ VEC_SUB_VU_I_VU(vec_xst,1) VEC_SUB_VU_I_VU(vec_xst,2) VEC_SUB_VU_I_VU(vec_xst,4) VEC_SUB_VU_I_VU(vec_xst,8)
+ VEC_SUB_VR_I_VR(vec_xst,4) VEC_SUB_VR_I_VR(vec_xst,8)
+ VEC_SUB_VI_I_I(vec_xst,1) VEC_SUB_VI_I_I(vec_xst,2) VEC_SUB_VI_I_I(vec_xst,4) VEC_SUB_VI_I_I(vec_xst,8)
+ VEC_SUB_VU_I_I(vec_xst,1) VEC_SUB_VU_I_I(vec_xst,2) VEC_SUB_VU_I_I(vec_xst,4) VEC_SUB_VU_I_I(vec_xst,8)
+ VEC_SUB_VR_I_R(vec_xst,4) VEC_SUB_VR_I_R(vec_xst,8)
+ interface vec_xst
+ procedure :: SUB_VI_I_VI(vec_xst,1), SUB_VI_I_VI(vec_xst,2), SUB_VI_I_VI(vec_xst,4), SUB_VI_I_VI(vec_xst,8)
+ procedure :: SUB_VU_I_VU(vec_xst,1), SUB_VU_I_VU(vec_xst,2), SUB_VU_I_VU(vec_xst,4), SUB_VU_I_VU(vec_xst,8)
+ procedure :: SUB_VR_I_VR(vec_xst,4), SUB_VR_I_VR(vec_xst,8)
+ procedure :: SUB_VI_I_I(vec_xst,1), SUB_VI_I_I(vec_xst,2), SUB_VI_I_I(vec_xst,4), SUB_VI_I_I(vec_xst,8)
+ procedure :: SUB_VU_I_I(vec_xst,1), SUB_VU_I_I(vec_xst,2), SUB_VU_I_I(vec_xst,4), SUB_VU_I_I(vec_xst,8)
+ procedure :: SUB_VR_I_R(vec_xst,4), SUB_VR_I_R(vec_xst,8)
+ end interface vec_xst
+ public :: vec_xst
+
+! vec_xst_be
+ VEC_SUB_VI_I_VI(vec_xst_be,1) VEC_SUB_VI_I_VI(vec_xst_be,2) VEC_SUB_VI_I_VI(vec_xst_be,4) VEC_SUB_VI_I_VI(vec_xst_be,8)
+ VEC_SUB_VU_I_VU(vec_xst_be,1) VEC_SUB_VU_I_VU(vec_xst_be,2) VEC_SUB_VU_I_VU(vec_xst_be,4) VEC_SUB_VU_I_VU(vec_xst_be,8)
+ VEC_SUB_VR_I_VR(vec_xst_be,4) VEC_SUB_VR_I_VR(vec_xst_be,8)
+ VEC_SUB_VI_I_I(vec_xst_be,1) VEC_SUB_VI_I_I(vec_xst_be,2) VEC_SUB_VI_I_I(vec_xst_be,4) VEC_SUB_VI_I_I(vec_xst_be,8)
+ VEC_SUB_VU_I_I(vec_xst_be,1) VEC_SUB_VU_I_I(vec_xst_be,2) VEC_SUB_VU_I_I(vec_xst_be,4) VEC_SUB_VU_I_I(vec_xst_be,8)
+ VEC_SUB_VR_I_R(vec_xst_be,4) VEC_SUB_VR_I_R(vec_xst_be,8)
+ interface vec_xst_be
+ procedure :: SUB_VI_I_VI(vec_xst_be,1), SUB_VI_I_VI(vec_xst_be,2), SUB_VI_I_VI(vec_xst_be,4), SUB_VI_I_VI(vec_xst_be,8)
+ procedure :: SUB_VU_I_VU(vec_xst_be,1), SUB_VU_I_VU(vec_xst_be,2), SUB_VU_I_VU(vec_xst_be,4), SUB_VU_I_VU(vec_xst_be,8)
+ procedure :: SUB_VR_I_VR(vec_xst_be,4), SUB_VR_I_VR(vec_xst_be,8)
+ procedure :: SUB_VI_I_I(vec_xst_be,1), SUB_VI_I_I(vec_xst_be,2), SUB_VI_I_I(vec_xst_be,4), SUB_VI_I_I(vec_xst_be,8)
+ procedure :: SUB_VU_I_I(vec_xst_be,1), SUB_VU_I_I(vec_xst_be,2), SUB_VU_I_I(vec_xst_be,4), SUB_VU_I_I(vec_xst_be,8)
+ procedure :: SUB_VR_I_R(vec_xst_be,4), SUB_VR_I_R(vec_xst_be,8)
+ end interface vec_xst_be
+ public :: vec_xst_be
+
+! vec_xstd2
+ VEC_SUB_VI_I_VI(vec_xstd2_,1) VEC_SUB_VI_I_VI(vec_xstd2_,2) VEC_SUB_VI_I_VI(vec_xstd2_,4) VEC_SUB_VI_I_VI(vec_xstd2_,8)
+ VEC_SUB_VU_I_VU(vec_xstd2_,1) VEC_SUB_VU_I_VU(vec_xstd2_,2) VEC_SUB_VU_I_VU(vec_xstd2_,4) VEC_SUB_VU_I_VU(vec_xstd2_,8)
+ VEC_SUB_VR_I_VR(vec_xstd2_,4) VEC_SUB_VR_I_VR(vec_xstd2_,8)
+ VEC_SUB_VI_I_I(vec_xstd2_,1) VEC_SUB_VI_I_I(vec_xstd2_,2) VEC_SUB_VI_I_I(vec_xstd2_,4) VEC_SUB_VI_I_I(vec_xstd2_,8)
+ VEC_SUB_VU_I_I(vec_xstd2_,1) VEC_SUB_VU_I_I(vec_xstd2_,2) VEC_SUB_VU_I_I(vec_xstd2_,4) VEC_SUB_VU_I_I(vec_xstd2_,8)
+ VEC_SUB_VR_I_R(vec_xstd2_,4) VEC_SUB_VR_I_R(vec_xstd2_,8)
+ interface vec_xstd2
+ procedure :: SUB_VI_I_VI(vec_xstd2_,1), SUB_VI_I_VI(vec_xstd2_,2), SUB_VI_I_VI(vec_xstd2_,4), SUB_VI_I_VI(vec_xstd2_,8)
+ procedure :: SUB_VU_I_VU(vec_xstd2_,1), SUB_VU_I_VU(vec_xstd2_,2), SUB_VU_I_VU(vec_xstd2_,4), SUB_VU_I_VU(vec_xstd2_,8)
+ procedure :: SUB_VR_I_VR(vec_xstd2_,4), SUB_VR_I_VR(vec_xstd2_,8)
+ procedure :: SUB_VI_I_I(vec_xstd2_,1), SUB_VI_I_I(vec_xstd2_,2), SUB_VI_I_I(vec_xstd2_,4), SUB_VI_I_I(vec_xstd2_,8)
+ procedure :: SUB_VU_I_I(vec_xstd2_,1), SUB_VU_I_I(vec_xstd2_,2), SUB_VU_I_I(vec_xstd2_,4), SUB_VU_I_I(vec_xstd2_,8)
+ procedure :: SUB_VR_I_R(vec_xstd2_,4), SUB_VR_I_R(vec_xstd2_,8)
+ end interface vec_xstd2
+ public :: vec_xstd2
+
+! vec_xstw4
+ VEC_SUB_VI_I_VI(vec_xstw4_,1) VEC_SUB_VI_I_VI(vec_xstw4_,2) VEC_SUB_VI_I_VI(vec_xstw4_,4)
+ VEC_SUB_VU_I_VU(vec_xstw4_,1) VEC_SUB_VU_I_VU(vec_xstw4_,2) VEC_SUB_VU_I_VU(vec_xstw4_,4)
+ VEC_SUB_VR_I_VR(vec_xstw4_,4)
+ VEC_SUB_VI_I_I(vec_xstw4_,1) VEC_SUB_VI_I_I(vec_xstw4_,2) VEC_SUB_VI_I_I(vec_xstw4_,4)
+ VEC_SUB_VU_I_I(vec_xstw4_,1) VEC_SUB_VU_I_I(vec_xstw4_,2) VEC_SUB_VU_I_I(vec_xstw4_,4)
+ VEC_SUB_VR_I_R(vec_xstw4_,4)
+ interface vec_xstw4
+ procedure :: SUB_VI_I_VI(vec_xstw4_,1), SUB_VI_I_VI(vec_xstw4_,2), SUB_VI_I_VI(vec_xstw4_,4)
+ procedure :: SUB_VU_I_VU(vec_xstw4_,1), SUB_VU_I_VU(vec_xstw4_,2), SUB_VU_I_VU(vec_xstw4_,4)
+ procedure :: SUB_VR_I_VR(vec_xstw4_,4)
+ procedure :: SUB_VI_I_I(vec_xstw4_,1), SUB_VI_I_I(vec_xstw4_,2), SUB_VI_I_I(vec_xstw4_,4)
+ procedure :: SUB_VU_I_I(vec_xstw4_,1), SUB_VU_I_I(vec_xstw4_,2), SUB_VU_I_I(vec_xstw4_,4)
+ procedure :: SUB_VR_I_R(vec_xstw4_,4)
+ end interface vec_xstw4
+ public :: vec_xstw4
+
+#undef VEC_SUB_VI_I_VI
+#undef VEC_SUB_VU_I_VU
+#undef VEC_SUB_VR_I_VR
+#undef VEC_SUB_VI_I_I
+#undef VEC_SUB_VU_I_I
+#undef VEC_SUB_VR_I_R
+#undef SUB_VI_I_VI
+#undef SUB_VU_I_VU
+#undef SUB_VR_I_VR
+#undef SUB_VI_I_I
+#undef SUB_VU_I_I
+#undef SUB_VR_Ik_R
+
end module __ppc_intrinsics
diff --git a/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90 b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
new file mode 100644
index 00000000000000..40f4c01e801594
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-store-elem-order.f90
@@ -0,0 +1,165 @@
+! RUN: bbc -emit-fir %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="FIR" %s
+! RUN: %flang_fc1 -emit-llvm %s -fno-ppc-native-vector-element-order -o - | FileCheck --check-prefixes="LLVMIR" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_st
+!----------------------
+! CHECK-LABEL: vec_st_test
+subroutine vec_st_test(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1
+ integer(4) :: arg2
+ vector(integer(2)) :: arg3
+ call vec_st(arg1, arg2, arg3)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
+! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! FIR: %[[bc:.*]] = vector.bitcast %[[varg1]] : vector<8xi16> to vector<4xi32>
+! FIR: %[[ordr:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shf:.*]] = vector.shuffle %[[bc]], %[[ordr]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: fir.call @llvm.ppc.altivec.stvx(%[[shf]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! LLVMIR: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: %[[bc:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[shf:.*]] = shufflevector <4 x i32> %[[bc]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[shf]], ptr %[[addr]])
+end subroutine vec_st_test
+
+!----------------------
+! vec_ste
+!----------------------
+! CHECK-LABEL: vec_ste_test
+subroutine vec_ste_test(arg1, arg2, arg3)
+ vector(real(4)) :: arg1
+ integer(4) :: arg2
+ real(4) :: arg3
+ call vec_ste(arg1, arg2, arg3)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! FIR: %[[bc:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<4xi32>
+! FIR: %[[ordr:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shf:.*]] = vector.shuffle %[[bc]], %[[ordr]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: fir.call @llvm.ppc.altivec.stvewx(%[[shf]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[addr]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: %[[bc:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! LLVMIR: %[[shf:.*]] = shufflevector <4 x i32> %[[bc]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[shf]], ptr %[[addr]])
+end subroutine vec_ste_test
+
+!----------------------
+! vec_xst
+!----------------------
+! CHECK-LABEL: vec_xst_test
+subroutine vec_xst_test(arg1, arg2, arg3)
+ vector(integer(4)) :: arg1
+ integer(4) :: arg2
+ vector(integer(4)) :: arg3
+ call vec_xst(arg1, arg2, arg3)
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! FIR: %[[ordr:.*]] = fir.undefined vector<4xi32>
+! FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[ordr]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
+
+! LLVMIR: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i32, ptr %1, align 4
+! LLVMIR: %[[trg:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! LLVMIR: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x i32> %[[src]], ptr %[[trg]], align 1
+end subroutine vec_xst_test
+
+!----------------------
+! vec_xstd2
+!----------------------
+! CHECK-LABEL: vec_xstd2_test
+subroutine vec_xstd2_test(arg1, arg2, arg3, i)
+ vector(real(4)) :: arg1
+ integer(2) :: arg2
+ vector(real(4)) :: arg3(*)
+ integer(4) :: i
+ call vec_xstd2(arg1, arg2, arg3(i))
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[arg4:.*]] = fir.load %arg3 : !fir.ref<i32>
+! FIR: %[[arg4_64:.*]] = fir.convert %[[arg4]] : (i32) -> i64
+! FIR: %[[one:.*]] = arith.constant 1 : i64
+! FIR: %[[idx:.*]] = arith.subi %[[arg4_64]], %[[one]] : i64
+! FIR: %[[elemaddr:.*]] = fir.coordinate_of %arg2, %[[idx]] : (!fir.ref<!fir.array<?x!fir.vector<4:f32>>>, i64) -> !fir.ref<!fir.vector<4:f32>>
+! FIR: %[[elemptr:.*]] = fir.convert %[[elemaddr]] : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! FIR: %[[v2elem:.*]] = vector.bitcast %[[varg1]] : vector<4xf32> to vector<2xi64>
+! FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
+! FIR: %[[undef:.*]] = fir.undefined vector<2xi64>
+! FIR: %[[shf:.*]] = vector.shuffle %[[v2elem]], %[[undef]] [1, 0] : vector<2xi64>, vector<2xi64>
+! FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %1, align 2
+! LLVMIR: %[[arg4:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[arg4_64:.*]] = sext i32 %[[arg4]] to i64
+! LLVMIR: %[[idx:.*]] = sub i64 %[[arg4_64]], 1
+! LLVMIR: %[[elemptr:.*]] = getelementptr <4 x float>, ptr %2, i64 %[[idx]]
+! LLVMIR: %[[trg:.*]] = getelementptr i8, ptr %[[elemptr]], i16 %[[arg2]]
+! LLVMIR: %[[v2elem:.*]] = bitcast <4 x float> %[[arg1]] to <2 x i64>
+! LLVMIR: %[[src:.*]] = shufflevector <2 x i64> %[[v2elem]], <2 x i64> undef, <2 x i32> <i32 1, i32 0>
+! LLVMIR: store <2 x i64> %[[src]], ptr %[[trg]], align 1
+end subroutine vec_xstd2_test
+
+!----------------------
+! vec_xstw4
+!----------------------
+! CHECK-LABEL: vec_xstw4_test
+subroutine vec_xstw4_test(arg1, arg2, arg3, i)
+ vector(real(4)) :: arg1
+ integer(2) :: arg2
+ vector(real(4)) :: arg3(*)
+ integer(4) :: i
+ call vec_xstw4(arg1, arg2, arg3(i))
+
+! FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:f32>>
+! FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i16>
+! FIR: %[[arg4:.*]] = fir.load %arg3 : !fir.ref<i32>
+! FIR: %[[arg4_64:.*]] = fir.convert %[[arg4]] : (i32) -> i64
+! FIR: %[[one:.*]] = arith.constant 1 : i64
+! FIR: %[[idx:.*]] = arith.subi %[[arg4_64]], %[[one]] : i64
+! FIR: %[[elemaddr:.*]] = fir.coordinate_of %arg2, %[[idx]] : (!fir.ref<!fir.array<?x!fir.vector<4:f32>>>, i64) -> !fir.ref<!fir.vector<4:f32>>
+! FIR: %[[elemptr:.*]] = fir.convert %[[elemaddr]] : (!fir.ref<!fir.vector<4:f32>>) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
+! FIR: %[[undef:.*]] = fir.undefined vector<4xf32>
+! FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
+
+! LLVMIR: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
+! LLVMIR: %[[arg2:.*]] = load i16, ptr %1, align 2
+! LLVMIR: %[[arg4:.*]] = load i32, ptr %3, align 4
+! LLVMIR: %[[arg4_64:.*]] = sext i32 %[[arg4]] to i64
+! LLVMIR: %[[idx:.*]] = sub i64 %[[arg4_64]], 1
+! LLVMIR: %[[elemptr:.*]] = getelementptr <4 x float>, ptr %2, i64 %[[idx]]
+! LLVMIR: %[[trg:.*]] = getelementptr i8, ptr %[[elemptr]], i16 %[[arg2]]
+! LLVMIR: %[[src:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! LLVMIR: store <4 x float> %[[src]], ptr %[[trg]], align 1
+end subroutine vec_xstw4_test
diff --git a/flang/test/Lower/PowerPC/ppc-vec-store.f90 b/flang/test/Lower/PowerPC/ppc-vec-store.f90
new file mode 100644
index 00000000000000..ddbb7fb2a879b7
--- /dev/null
+++ b/flang/test/Lower/PowerPC/ppc-vec-store.f90
@@ -0,0 +1,1207 @@
+! RUN: bbc -emit-fir %s -o - | FileCheck --check-prefixes="CHECK-FIR" %s
+! RUN: %flang_fc1 -emit-fir %s -o - | fir-opt --fir-to-llvm-ir | FileCheck --check-prefixes="CHECK-LLVMIR" %s
+! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck --check-prefixes="CHECK" %s
+! REQUIRES: target=powerpc{{.*}}
+
+!----------------------
+! vec_st
+!----------------------
+
+! CHECK-LABEL: vec_st_vi1i2vi1
+subroutine vec_st_vi1i2vi1(arg1, arg2, arg3)
+ vector(integer(1)) :: arg1, arg3
+ integer(2) :: arg2
+ call vec_st(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:i8>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
+! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<16xi8> to vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<16xi8>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! CHECK: %[[bcArg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
+end subroutine vec_st_vi1i2vi1
+
+! CHECK-LABEL: vec_st_vi2i2vi2
+subroutine vec_st_vi2i2vi2(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1, arg3
+ integer(2) :: arg2
+ call vec_st(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<8xi16> to vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! CHECK: %[[bcArg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
+end subroutine vec_st_vi2i2vi2
+
+! CHECK-LABEL: vec_st_vi4i2vi4
+subroutine vec_st_vi4i2vi4(arg1, arg2, arg3)
+ vector(integer(4)) :: arg1, arg3
+ integer(2) :: arg2
+ call vec_st(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[arg3]])
+end subroutine vec_st_vi4i2vi4
+
+! CHECK-LABEL: vec_st_vu1i4vu1
+subroutine vec_st_vu1i4vu1(arg1, arg2, arg3)
+ vector(unsigned(1)) :: arg1, arg3
+ integer(4) :: arg2
+ call vec_st(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<16:ui8>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
+! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<16xi8> to vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<16xi8>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<16xi8> to vector<4xi32>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! CHECK: %[[bcArg1:.*]] = bitcast <16 x i8> %[[arg1]] to <4 x i32>
+! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
+end subroutine vec_st_vu1i4vu1
+
+! CHECK-LABEL: vec_st_vu2i4vu2
+subroutine vec_st_vu2i4vu2(arg1, arg2, arg3)
+ vector(unsigned(2)) :: arg1, arg3
+ integer(4) :: arg2
+ call vec_st(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<8:ui16>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
+! CHECK-FIR: %[[bcArg1:.*]] = vector.bitcast %[[cnvArg1]] : vector<8xi16> to vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[bcArg1:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[bcArg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! CHECK: %[[bcArg1:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[bcArg1]], ptr %[[arg3]])
+end subroutine vec_st_vu2i4vu2
+
+! CHECK-LABEL: vec_st_vu4i4vu4
+subroutine vec_st_vu4i4vu4(arg1, arg2, arg3)
+ vector(unsigned(4)) :: arg1, arg3
+ integer(4) :: arg2
+ call vec_st(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<!fir.vector<4:ui32>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %[[arg1]], ptr %[[arg3]])
+end subroutine vec_st_vu4i4vu4
+
+! CHECK-LABEL: vec_st_vi4i4via4
+subroutine vec_st_vi4i4via4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1, arg3(5)
+ integer(4) :: arg2, i
+ call vec_st(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[idx:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[cnst:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[cnstm1:.*]] = arith.subi %[[idx64]], %[[cnst]] : i64
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %arg2, %[[cnstm1]] : (!fir.ref<!fir.array<5x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[ref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvx(%[[varg1]], %[[pos]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<5 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[bc]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvx(%[[arg1]], %[[pos]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %5 = load <4 x i32>, ptr %0, align 16
+! CHECK: %6 = load i32, ptr %1, align 4
+! CHECK: %7 = load i32, ptr %3, align 4
+! CHECK: %8 = sext i32 %7 to i64
+! CHECK: %9 = sub i64 %8, 1
+! CHECK: %10 = getelementptr [5 x <4 x i32>], ptr %2, i32 0, i64 %9
+! CHECK: %11 = getelementptr i8, ptr %10, i32 %6
+! CHECK: call void @llvm.ppc.altivec.stvx(<4 x i32> %5, ptr %11)
+end subroutine vec_st_vi4i4via4
+
+!----------------------
+! vec_ste
+!----------------------
+
+! CHECK-LABEL: vec_ste_vi1i2i1
+subroutine vec_ste_vi1i2i1(arg1, arg2, arg3)
+ vector(integer(1)) :: arg1
+ integer(2) :: arg2
+ integer(1) :: arg3
+ call vec_ste(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:i8>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:i8>) -> vector<16xi8>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvebx(%[[cnvArg1]], %[[addr]]) fastmath<contract> : (vector<16xi8>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3:.*]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvebx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! CHECK: call void @llvm.ppc.altivec.stvebx(<16 x i8> %[[arg1]], ptr %[[arg3]])
+end subroutine vec_ste_vi1i2i1
+
+! CHECK-LABEL: vec_ste_vi2i2i2
+subroutine vec_ste_vi2i2i2(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1
+ integer(2) :: arg2
+ integer(2) :: arg3
+ call vec_ste(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i16>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvehx(%[[cnvArg1]], %[[addr]]) fastmath<contract> : (vector<8xi16>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<i16> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvehx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! CHECK: call void @llvm.ppc.altivec.stvehx(<8 x i16> %[[arg1]], ptr %[[arg3]])
+end subroutine vec_ste_vi2i2i2
+
+! CHECK-LABEL: vec_ste_vi4i2i4
+subroutine vec_ste_vi4i2i4(arg1, arg2, arg3)
+ vector(integer(4)) :: arg1
+ integer(2) :: arg2
+ integer(4) :: arg3
+ call vec_ste(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i16 %5
+! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[arg3]])
+end subroutine vec_ste_vi4i2i4
+
+! CHECK-LABEL: vec_ste_vu1i4u1
+subroutine vec_ste_vu1i4u1(arg1, arg2, arg3)
+ vector(unsigned(1)) :: arg1
+ integer(4) :: arg2
+ integer(1) :: arg3
+ call vec_ste(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<16:ui8>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i8>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<16:ui8>) -> vector<16xi8>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvebx(%[[cnvArg1]], %[[addr]]) fastmath<contract> : (vector<16xi8>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<16xi8>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3:.*]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvebx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<16xi8>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <16 x i8>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! CHECK: call void @llvm.ppc.altivec.stvebx(<16 x i8> %[[arg1]], ptr %[[arg3]])
+end subroutine vec_ste_vu1i4u1
+
+! CHECK-LABEL: vec_ste_vu2i4u2
+subroutine vec_ste_vu2i4u2(arg1, arg2, arg3)
+ vector(unsigned(2)) :: arg1
+ integer(4) :: arg2
+ integer(2) :: arg3
+ call vec_ste(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<8:ui16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i16>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:ui16>) -> vector<8xi16>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvehx(%[[cnvArg1]], %[[addr]]) fastmath<contract> : (vector<8xi16>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i16> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvehx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<8xi16>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! CHECK: call void @llvm.ppc.altivec.stvehx(<8 x i16> %[[arg1]], ptr %[[arg3]])
+end subroutine vec_ste_vu2i4u2
+
+! CHECK-LABEL: vec_ste_vu4i4u4
+subroutine vec_ste_vu4i4u4(arg1, arg2, arg3)
+ vector(unsigned(4)) :: arg1
+ integer(4) :: arg2
+ integer(4) :: arg3
+ call vec_ste(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:ui32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %{{.*}} : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:ui32>) -> vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[addr]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[addr]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %{{.*}}, align 4
+! CHECK: %[[arg3:.*]] = getelementptr i8, ptr %{{.*}}, i32 %5
+! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[arg3]])
+end subroutine vec_ste_vu4i4u4
+
+! CHECK-LABEL: vec_ste_vr4i4r4
+subroutine vec_ste_vr4i4r4(arg1, arg2, arg3)
+ vector(real(4)) :: arg1
+ integer(4) :: arg2
+ real(4) :: arg3
+ call vec_ste(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[cnvArg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[bc:.*]] = vector.bitcast %[[cnvArg1]] : vector<4xf32> to vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[bc]], %[[pos]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<f32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<4xi32>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[bc]], %[[pos]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[pos:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! CHECK: %[[bc:.*]] = bitcast <4 x float> %[[arg1]] to <4 x i32>
+! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[bc]], ptr %[[pos]])
+
+end subroutine vec_ste_vr4i4r4
+
+! CHECK-LABEL: vec_ste_vi4i4ia4
+subroutine vec_ste_vi4i4ia4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(4) :: arg2, i
+ integer(4) :: arg3(5)
+ call vec_ste(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[idx:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[cnst:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[cnstm1:.*]] = arith.subi %[[idx64]], %[[cnst]] : i64
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %arg2, %[[cnstm1]] : (!fir.ref<!fir.array<5xi32>>, i64) -> !fir.ref<i32>
+! CHECK-FIR: %[[ref:.*]] = fir.convert %[[addr]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[pos:.*]] = fir.coordinate_of %[[ref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: fir.call @llvm.ppc.altivec.stvewx(%[[varg1]], %[[pos]]) fastmath<contract> : (vector<4xi32>, !fir.ref<!fir.array<?xi8>>) -> ()
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<5 x i32>>, i64) -> !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[bc:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[pos:.*]] = llvm.getelementptr %[[bc]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: llvm.call @llvm.ppc.altivec.stvewx(%[[arg1]], %[[pos]]) {fastmathFlags = #llvm.fastmath<contract>} : (vector<4xi32>, !llvm.ptr<i8>) -> ()
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[addr:.*]] = getelementptr [5 x i32], ptr %[[arg3:.*]], i32 0, i64 %[[idx64m1]]
+! CHECK: %[[pos:.*]] = getelementptr i8, ptr %[[addr]], i32 %[[arg2]]
+! CHECK: call void @llvm.ppc.altivec.stvewx(<4 x i32> %[[arg1]], ptr %[[pos]])
+end subroutine vec_ste_vi4i4ia4
+
+!----------------------
+! vec_stxv
+!----------------------
+
+! CHECK-LABEL: vec_stxv_test_vr4i2r4
+subroutine vec_stxv_test_vr4i2r4(arg1, arg2, arg3)
+ vector(real(4)) :: arg1
+ integer(2) :: arg2
+ real(4) :: arg3
+ call vec_stxv(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1
+end subroutine vec_stxv_test_vr4i2r4
+
+! CHECK-LABEL: vec_stxv_test_vi4i8ia4
+subroutine vec_stxv_test_vi4i8ia4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(8) :: arg2
+ integer(4) :: arg3(10)
+ integer(4) :: i
+ call vec_stxv(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
+! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
+! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+end subroutine vec_stxv_test_vi4i8ia4
+
+! CHECK-LABEL: vec_stxv_test_vi2i4vi2
+subroutine vec_stxv_test_vi2i4vi2(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1
+ integer(4) :: arg2
+ vector(integer(2)) :: arg3
+ call vec_stxv(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<8:i16>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! CHECK: store <8 x i16> %[[arg1]], ptr %[[addr]], align 1
+end subroutine vec_stxv_test_vi2i4vi2
+
+! CHECK-LABEL: vec_stxv_test_vi4i4vai4
+subroutine vec_stxv_test_vi4i4vai4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(4) :: arg2
+ vector(integer(4)) :: arg3(20)
+ integer(4) :: i
+ call vec_stxv(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
+! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+end subroutine vec_stxv_test_vi4i4vai4
+
+!----------------------
+! vec_xst
+!----------------------
+
+! CHECK-LABEL: vec_xst_test_vr4i2r4
+subroutine vec_xst_test_vr4i2r4(arg1, arg2, arg3)
+ vector(real(4)) :: arg1
+ integer(2) :: arg2
+ real(4) :: arg3
+ call vec_xst(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1
+end subroutine vec_xst_test_vr4i2r4
+
+! CHECK-LABEL: vec_xst_test_vi4i8ia4
+subroutine vec_xst_test_vi4i8ia4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(8) :: arg2
+ integer(4) :: arg3(10)
+ integer(4) :: i
+ call vec_xst(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
+! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
+! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+end subroutine vec_xst_test_vi4i8ia4
+
+! CHECK-LABEL: vec_xst_test_vi2i4vi2
+subroutine vec_xst_test_vi2i4vi2(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1
+ integer(4) :: arg2
+ vector(integer(2)) :: arg3
+ call vec_xst(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<8:i16>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! CHECK: store <8 x i16> %[[arg1]], ptr %[[addr]], align 1
+end subroutine vec_xst_test_vi2i4vi2
+
+! CHECK-LABEL: vec_xst_test_vi4i4vai4
+subroutine vec_xst_test_vi4i4vai4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(4) :: arg2
+ vector(integer(4)) :: arg3(20)
+ integer(4) :: i
+ call vec_xst(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: fir.store %[[arg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
+! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+end subroutine vec_xst_test_vi4i4vai4
+
+!----------------------
+! vec_xst_be
+!----------------------
+
+! CHECK-LABEL: vec_xst_be_test_vr4i2r4
+subroutine vec_xst_be_test_vr4i2r4(arg1, arg2, arg3)
+ vector(real(4)) :: arg1
+ integer(2) :: arg2
+ real(4) :: arg3
+ call vec_xst_be(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xf32>
+! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xf32>, vector<4xf32>
+! CHECK-FIR: %[[fvarg1:.*]] = fir.convert %[[shf]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! CHECK-FIR: fir.store %[[fvarg1]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xf32>
+! CHECK-LLVMIR: %[[shf:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xf32>
+! CHECK-LLVMIR: llvm.store %[[shf]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! CHECK: %[[shf:.*]] = shufflevector <4 x float> %[[arg1]], <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! CHECK: store <4 x float> %[[shf]], ptr %[[addr]], align 1
+end subroutine vec_xst_be_test_vr4i2r4
+
+! CHECK-LABEL: vec_xst_be_test_vi4i8ia4
+subroutine vec_xst_be_test_vi4i8ia4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(8) :: arg2
+ integer(4) :: arg3(10)
+ integer(4) :: i
+ call vec_xst_be(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
+! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xi32>
+! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
+! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>
+! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
+! CHECK: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! CHECK: store <4 x i32> %[[src]], ptr %[[trg]], align 1
+end subroutine vec_xst_be_test_vi4i8ia4
+
+! CHECK-LABEL: vec_xst_be_test_vi2i4vi2
+subroutine vec_xst_be_test_vi2i4vi2(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1
+ integer(4) :: arg2
+ vector(integer(2)) :: arg3
+ call vec_xst_be(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<8xi16>
+! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>, vector<8xi16>
+! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<8xi16>) -> !fir.vector<8:i16>
+! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<8:i16>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<8xi16>
+! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [7, 6, 5, 4, 3, 2, 1, 0] : vector<8xi16>
+! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! CHECK: %[[src:.*]] = shufflevector <8 x i16> %[[arg1]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+! CHECK: store <8 x i16> %[[src]], ptr %[[addr]], align 1
+end subroutine vec_xst_be_test_vi2i4vi2
+
+! CHECK-LABEL: vec_xst_be_test_vi4i4vai4
+subroutine vec_xst_be_test_vi4i4vai4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(4) :: arg2
+ vector(integer(4)) :: arg3(20)
+ integer(4) :: i
+ call vec_xst_be(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[varg1:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[undef:.*]] = fir.undefined vector<4xi32>
+! CHECK-FIR: %[[shf:.*]] = vector.shuffle %[[varg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>, vector<4xi32>
+! CHECK-FIR: %[[src:.*]] = fir.convert %[[shf]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! CHECK-FIR: fir.store %[[src]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[undef:.*]] = llvm.mlir.undef : vector<4xi32>
+! CHECK-LLVMIR: %[[src:.*]] = llvm.shufflevector %[[arg1]], %[[undef]] [3, 2, 1, 0] : vector<4xi32>
+! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
+! CHECK: %[[src:.*]] = shufflevector <4 x i32> %[[arg1]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+! CHECK: store <4 x i32> %[[src]], ptr %[[trg]], align 1
+end subroutine vec_xst_be_test_vi4i4vai4
+
+!----------------------
+! vec_xstd2
+!----------------------
+
+! CHECK-LABEL: vec_xstd2_test_vr4i2r4
+subroutine vec_xstd2_test_vr4i2r4(arg1, arg2, arg3)
+ vector(real(4)) :: arg1
+ integer(2) :: arg2
+ real(4) :: arg3
+ call vec_xstd2(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xf32> to vector<2xi64>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xf32> to vector<2xi64>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! CHECK: %[[src:.*]] = bitcast <4 x float> %[[arg1]] to <2 x i64>
+! CHECK: store <2 x i64> %[[src]], ptr %[[addr]], align 1
+end subroutine vec_xstd2_test_vr4i2r4
+
+! CHECK-LABEL: vec_xstd2_test_vi4i8ia4
+subroutine vec_xstd2_test_vi4i8ia4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(8) :: arg2
+ integer(4) :: arg3(10)
+ integer(4) :: i
+ call vec_xstd2(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
+! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xi32> to vector<2xi64>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<2xi64>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
+! CHECK: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64>
+! CHECK: store <2 x i64> %[[src]], ptr %[[trg]], align 1
+end subroutine vec_xstd2_test_vi4i8ia4
+
+! CHECK-LABEL: vec_xstd2_test_vi2i4vi2
+subroutine vec_xstd2_test_vi2i4vi2(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1
+ integer(4) :: arg2
+ vector(integer(2)) :: arg3
+ call vec_xstd2(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<8xi16> to vector<2xi64>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<2xi64>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! CHECK: %[[src:.*]] = bitcast <8 x i16> %[[arg1]] to <2 x i64>
+! CHECK: store <2 x i64> %[[src]], ptr %[[addr]], align 1
+end subroutine vec_xstd2_test_vi2i4vi2
+
+! CHECK-LABEL: vec_xstd2_test_vi4i4vai4
+subroutine vec_xstd2_test_vi4i4vai4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(4) :: arg2
+ vector(integer(4)) :: arg3(20)
+ integer(4) :: i
+ call vec_xstd2(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<4xi32> to vector<2xi64>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<2:i64>>
+! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<2xi64>) -> !fir.vector<2:i64>
+! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<2:i64>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<4xi32> to vector<2xi64>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<2xi64>>
+! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
+! CHECK: %[[src:.*]] = bitcast <4 x i32> %[[arg1]] to <2 x i64>
+! CHECK: store <2 x i64> %[[src]], ptr %[[trg]], align 1
+end subroutine vec_xstd2_test_vi4i4vai4
+
+!----------------------
+! vec_xstw4
+!----------------------
+
+! CHECK-LABEL: vec_xstw4_test_vr4i2r4
+subroutine vec_xstw4_test_vr4i2r4(arg1, arg2, arg3)
+ vector(real(4)) :: arg1
+ integer(2) :: arg2
+ real(4) :: arg3
+ call vec_xstw4(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %{{.*}} : !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %{{.*}} : !fir.ref<i16>
+! CHECK-FIR: %[[arg3ptr:.*]] = fir.convert %arg2 : (!fir.ref<f32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3ptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i16) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:f32>) -> vector<4xf32>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:f32>>
+! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xf32>) -> !fir.vector<4:f32>
+! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:f32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %{{.*}} : !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %{{.*}} : !llvm.ptr<i16>
+! CHECK-LLVMIR: %[[arg3ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<f32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3ptr]][%[[arg2]]] : (!llvm.ptr<i8>, i16) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xf32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x float>, ptr %{{.*}}, align 16
+! CHECK: %[[arg2:.*]] = load i16, ptr %{{.*}}, align 2
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %{{.*}}, i16 %[[arg2]]
+! CHECK: store <4 x float> %[[arg1]], ptr %[[addr]], align 1
+end subroutine vec_xstw4_test_vr4i2r4
+
+! CHECK-LABEL: vec_xstw4_test_vi4i8ia4
+subroutine vec_xstw4_test_vi4i8ia4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(8) :: arg2
+ integer(4) :: arg3(10)
+ integer(4) :: i
+ call vec_xstw4(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i64>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %arg2, %[[idx64m1]] : (!fir.ref<!fir.array<10xi32>>, i64) -> !fir.ref<i32>
+! CHECK-FIR: %[[elemref:.*]] = fir.convert %[[elem]] : (!fir.ref<i32>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemref]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i64) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]]
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i64>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %arg2[0, %[[idx64m1]]] : (!llvm.ptr<array<10 x i32>>, i64) -> !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[elemref:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<i32> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemref]][%[[arg2]]] : (!llvm.ptr<i8>, i64) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i64, ptr %1, align 8
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [10 x i32], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i64 %6
+! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+end subroutine vec_xstw4_test_vi4i8ia4
+
+! CHECK-LABEL: vec_xstw4_test_vi2i4vi2
+subroutine vec_xstw4_test_vi2i4vi2(arg1, arg2, arg3)
+ vector(integer(2)) :: arg1
+ integer(4) :: arg2
+ vector(integer(2)) :: arg3
+ call vec_xstw4(arg1, arg2, arg3)
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<8:i16>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[arg3:.*]] = fir.convert %arg2 : (!fir.ref<!fir.vector<8:i16>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[arg3]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<8:i16>) -> vector<8xi16>
+! CHECK-FIR: %[[bcsrc:.*]] = vector.bitcast %[[vsrc]] : vector<8xi16> to vector<4xi32>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[bcsrc]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]] {alignment = 1 : i64} : !fir.ref<!fir.vector<4:i32>>
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<8xi16>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[arg3:.*]] = llvm.bitcast %arg2 : !llvm.ptr<vector<8xi16>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[arg3]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[src:.*]] = llvm.bitcast %[[arg1]] : vector<8xi16> to vector<4xi32>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: llvm.store %[[src]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <8 x i16>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[addr:.*]] = getelementptr i8, ptr %2, i32 %[[arg2]]
+! CHECK: %[[src:.*]] = bitcast <8 x i16> %[[arg1]] to <4 x i32>
+! CHECK: store <4 x i32> %[[src]], ptr %[[addr]], align 1
+end subroutine vec_xstw4_test_vi2i4vi2
+
+! CHECK-LABEL: vec_xstw4_test_vi4i4vai4
+subroutine vec_xstw4_test_vi4i4vai4(arg1, arg2, arg3, i)
+ vector(integer(4)) :: arg1
+ integer(4) :: arg2
+ vector(integer(4)) :: arg3(20)
+ integer(4) :: i
+ call vec_xstw4(arg1, arg2, arg3(i))
+
+! CHECK-FIR: %[[arg1:.*]] = fir.load %arg0 : !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[arg2:.*]] = fir.load %arg1 : !fir.ref<i32>
+! CHECK-FIR: %[[idx:.*]] = fir.load %arg3 : !fir.ref<i32>
+! CHECK-FIR: %[[idx64:.*]] = fir.convert %[[idx]] : (i32) -> i64
+! CHECK-FIR: %[[one:.*]] = arith.constant 1 : i64
+! CHECK-FIR: %[[idx64m1:.*]] = arith.subi %[[idx64]], %[[one]] : i64
+! CHECK-FIR: %[[elem:.*]] = fir.coordinate_of %[[arg3:.*]], %[[idx64m1]] : (!fir.ref<!fir.array<20x!fir.vector<4:i32>>>, i64) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[elemptr:.*]] = fir.convert %[[elem]] : (!fir.ref<!fir.vector<4:i32>>) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[addr:.*]] = fir.coordinate_of %[[elemptr]], %[[arg2]] : (!fir.ref<!fir.array<?xi8>>, i32) -> !fir.ref<!fir.array<?xi8>>
+! CHECK-FIR: %[[vsrc:.*]] = fir.convert %[[arg1]] : (!fir.vector<4:i32>) -> vector<4xi32>
+! CHECK-FIR: %[[trg:.*]] = fir.convert %[[addr]] : (!fir.ref<!fir.array<?xi8>>) -> !fir.ref<!fir.vector<4:i32>>
+! CHECK-FIR: %[[cnvsrc:.*]] = fir.convert %[[vsrc]] : (vector<4xi32>) -> !fir.vector<4:i32>
+! CHECK-FIR: fir.store %[[cnvsrc]] to %[[trg]]
+
+! CHECK-LLVMIR: %[[arg1:.*]] = llvm.load %arg0 : !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[arg2:.*]] = llvm.load %arg1 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx:.*]] = llvm.load %arg3 : !llvm.ptr<i32>
+! CHECK-LLVMIR: %[[idx64:.*]] = llvm.sext %[[idx]] : i32 to i64
+! CHECK-LLVMIR: %[[one:.*]] = llvm.mlir.constant(1 : i64) : i64
+! CHECK-LLVMIR: %[[idx64m1:.*]] = llvm.sub %[[idx64]], %[[one]] : i64
+! CHECK-LLVMIR: %[[elem:.*]] = llvm.getelementptr %[[arg3:.*]][0, %[[idx64m1]]] : (!llvm.ptr<array<20 x vector<4xi32>>>, i64) -> !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: %[[elemptr:.*]] = llvm.bitcast %[[elem]] : !llvm.ptr<vector<4xi32>> to !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[addr:.*]] = llvm.getelementptr %[[elemptr]][%[[arg2]]] : (!llvm.ptr<i8>, i32) -> !llvm.ptr<i8>
+! CHECK-LLVMIR: %[[trg:.*]] = llvm.bitcast %[[addr]] : !llvm.ptr<i8> to !llvm.ptr<vector<4xi32>>
+! CHECK-LLVMIR: llvm.store %[[arg1]], %[[trg]]
+
+! CHECK: %[[arg1:.*]] = load <4 x i32>, ptr %0, align 16
+! CHECK: %[[arg2:.*]] = load i32, ptr %1, align 4
+! CHECK: %[[idx:.*]] = load i32, ptr %3, align 4
+! CHECK: %[[idx64:.*]] = sext i32 %[[idx]] to i64
+! CHECK: %[[idx64m1:.*]] = sub i64 %[[idx64]], 1
+! CHECK: %[[elem:.*]] = getelementptr [20 x <4 x i32>], ptr %2, i32 0, i64 %[[idx64m1]]
+! CHECK: %[[trg:.*]] = getelementptr i8, ptr %[[elem]], i32 %[[arg2]]
+! CHECK: store <4 x i32> %[[arg1]], ptr %[[trg]], align 1
+end subroutine vec_xstw4_test_vi4i4vai4
+
More information about the flang-commits
mailing list